Skip to content

Commit 5852bdd

Browse files
committed
vcffilter: Recognize '.' as a valid tag that can be used with
--keep-filter and --remove-filter.
1 parent 2cc1ae3 commit 5852bdd

8 files changed

+59
-27
lines changed

src/com/rtg/vcf/VcfFilterTask.java

+23-10
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
import java.util.TreeSet;
2424

2525
import com.reeltwo.jumble.annotations.TestClass;
26-
import com.rtg.util.intervals.ReferenceRegions;
2726
import com.rtg.util.diagnostic.NoTalkbackSlimException;
27+
import com.rtg.util.intervals.ReferenceRegions;
2828
import com.rtg.vcf.VcfFilterStatistics.Stat;
2929
import com.rtg.vcf.header.FilterField;
3030
import com.rtg.vcf.header.InfoField;
@@ -121,11 +121,10 @@ protected void checkHeaderFieldFilters(VcfHeader header) {
121121
}
122122
if (mKeepFilters.size() != 0 || mKeepInfos.size() != 0 || mRemoveFilters.size() != 0 || mRemoveInfos.size() != 0) {
123123
int index = 0;
124-
mFilterTags.put("PASS", index);
125-
index++;
124+
mFilterTags.put(VcfUtils.FILTER_PASS, index++);
125+
mFilterTags.put(VcfUtils.MISSING_FIELD, index++);
126126
for (final FilterField info : header.getFilterLines()) {
127-
mFilterTags.put(info.getId(), index);
128-
index++;
127+
mFilterTags.put(info.getId(), index++);
129128
}
130129
mVcfFilterStatistics.setFilterTags(mFilterTags);
131130
final Set<String> userFilterTags = new TreeSet<>();
@@ -223,10 +222,16 @@ boolean accept(VcfRecord record) {
223222
keep = true;
224223
}
225224
}
226-
for (final String tag : record.getFilters()) {
227-
if (mKeepFilters.contains(tag)) {
225+
if (record.getFilters().size() == 0) {
226+
if (mKeepFilters.contains(VcfUtils.MISSING_FIELD)) {
228227
keep = true;
229228
}
229+
} else {
230+
for (final String tag : record.getFilters()) {
231+
if (mKeepFilters.contains(tag)) {
232+
keep = true;
233+
}
234+
}
230235
}
231236
if (!keep) {
232237
mVcfFilterStatistics.increment(Stat.FAILED_KEEP_COUNT);
@@ -240,12 +245,20 @@ boolean accept(VcfRecord record) {
240245
return false;
241246
}
242247
}
243-
for (final String tag : record.getFilters()) {
244-
if (mRemoveFilters.contains(tag)) {
245-
mVcfFilterStatistics.incrementFilterTag(tag);
248+
if (record.getFilters().size() == 0) {
249+
if (mRemoveFilters.contains(VcfUtils.MISSING_FIELD)) {
250+
mVcfFilterStatistics.incrementFilterTag(VcfUtils.MISSING_FIELD);
246251
mNonSampleSpecificFailed = true;
247252
return false;
248253
}
254+
} else {
255+
for (final String tag : record.getFilters()) {
256+
if (mRemoveFilters.contains(tag)) {
257+
mVcfFilterStatistics.incrementFilterTag(tag);
258+
mNonSampleSpecificFailed = true;
259+
return false;
260+
}
261+
}
249262
}
250263
if (allSameAsRef(record)) {
251264
return false;

src/com/rtg/vcf/VcfUtils.java

+3
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ private VcfUtils() { }
6363
/** pass filter flag */
6464
public static final String FILTER_PASS = "PASS";
6565

66+
/** missing field (e.g. filter/QUAL) */
67+
public static final String MISSING_FIELD = "" + MISSING_VALUE;
68+
6669
/** confidence interval for POS columns */
6770
public static final String CONFIDENCE_INTERVAL_POS = "CIPOS";
6871

test/com/rtg/vcf/VcfFilterCliTest.java

+17-6
Original file line numberDiff line numberDiff line change
@@ -397,12 +397,12 @@ public void testMoreComplexCase() throws IOException {
397397
final String o = FileUtils.fileToString(out);
398398
assertTrue(o.length() > 0);
399399
TestUtils.containsAll(o,
400-
"snp --max-ih=1 -m cg_errors --Xindels --max-as-mated=4 -o snp_mated_cgerrors_indels_as4 -t /rtgshare/data/human/sdf/hg18 map_GS000005015/mated.sam.gz map_GS000005016/mated.sam.gz",
401-
VcfHeader.HEADER_LINE,
402-
"chr1 44376 . N NCT . PASS . GT:DP:RE:GQ:RS 1/0:4:0.052:12.3:CT,1,0.022,i,3,0.030".replaceAll("\\s+", "\t"),
403-
"chr1 45418 . N NT . PASS . GT:DP:RE:GQ:RS 1/0:4:0.536:6.2:T,1,0.506,i,3,0.030".replaceAll("\\s+", "\t"),
404-
"chr1 46244 . T C . PASS . GT:DP:RE:GQ:RS 1/1:4:0.287:7.8:C,4,0.287".replaceAll("\\s+", "\t"),
405-
"chr1 82299 . N NA . PASS . GT:DP:RE:GQ:RS 1/0:4:0.071:8.6:A,1,0.041,i,3,0.030".replaceAll("\\s+", "\t"));
400+
"snp --max-ih=1 -m cg_errors --Xindels --max-as-mated=4 -o snp_mated_cgerrors_indels_as4 -t /rtgshare/data/human/sdf/hg18 map_GS000005015/mated.sam.gz map_GS000005016/mated.sam.gz",
401+
VcfHeader.HEADER_LINE,
402+
"chr1 44376 . N NCT . PASS . GT:DP:RE:GQ:RS 1/0:4:0.052:12.3:CT,1,0.022,i,3,0.030".replaceAll("\\s+", "\t"),
403+
"chr1 45418 . N NT . PASS . GT:DP:RE:GQ:RS 1/0:4:0.536:6.2:T,1,0.506,i,3,0.030".replaceAll("\\s+", "\t"),
404+
"chr1 46244 . T C . PASS . GT:DP:RE:GQ:RS 1/1:4:0.287:7.8:C,4,0.287".replaceAll("\\s+", "\t"),
405+
"chr1 82299 . N NA . PASS . GT:DP:RE:GQ:RS 1/0:4:0.071:8.6:A,1,0.041,i,3,0.030".replaceAll("\\s+", "\t"));
406406

407407
TestUtils.containsAll(output, "Total records : 27", "Filtered due to posterior : 19", "Filtered due to sample read depth : 4", "Remaining records : 4");
408408
} finally {
@@ -700,6 +700,17 @@ public void testVcfKeepAll() throws IOException {
700700
);
701701
}
702702

703+
public void testVcfFilterDotPass() throws IOException {
704+
final String[] args = {
705+
"--remove-filter", ".", "--remove-filter", "PASS"
706+
};
707+
runResourceTest(
708+
args,
709+
RESOURCES + "snpfiltertest5.vcf",
710+
"snpfiltertest5_DotPass_exp.vcf"
711+
);
712+
}
713+
703714
public void testVcfDensityWindow() throws IOException {
704715
final String[] args = {
705716
"--density-window", "10"

test/com/rtg/vcf/resources/snpfiltertest5.vcf

+4-4
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,22 @@ g1 13 . C . 44.9 PASS . GT:DP:RE:AR:AB:GQ:RS 0/0:5:0.050:0.000:1.000:44.9:C,5,0.
2929
g1 14 . A . 47.2 PASS . GT:DP:RE:AR:AB:GQ:RS 0/0:5:0.050:0.000:1.000:47.2:A,5,0.050
3030
g1 15 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
3131
g1 16 . C . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
32-
g1 17 . T . 29.7 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
33-
g1 18 . A . 29.7 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
32+
g1 17 . T . 29.7 . . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
33+
g1 18 . A . 29.7 . . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
3434
g1 19 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
3535
g1 20 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
3636
g1 21 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/1:10:0.000:0.101:0.000:27.9
3737
g1 22 . G A,T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/2:10:0.000:0.101:0.000:27.9
3838
g1 23 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.101:0.500:27.9
3939
g1 24 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/0:10:0.000:0.101:0.500:27.9
40-
g1 25 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.101:1.000:27.9
40+
g1 25 . G . 27.9 . . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.101:1.000:27.9
4141
g1 26 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/1:10:0.000:0.000:0.260:27.9
4242
g1 27 . G A,T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/2:10:0.000:0.000:0.260:27.9
4343
g1 28 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.000:0.760:27.9
4444
g1 29 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.000:0.240:27.9
4545
g1 30 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/0:10:0.000:0.000:0.760:27.9
4646
g1 31 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/0:10:0.000:0.000:0.240:27.9
47-
g1 32 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.000:0.740:27.9
47+
g1 32 . G . 27.9 . . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.000:0.740:27.9
4848
g1 33 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/1:10:0.000:0.000:0.000:27.9
4949
g1 34 . G A,T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/2:10:0.000:0.000:0.000:27.9
5050
g1 35 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.000:0.500:27.9

test/com/rtg/vcf/resources/snpfiltertest5_DotPass_exp.vcf

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
Total records : 42
3+
Filtered due to . : 4
4+
Filtered due to PASS : 38
5+
Remaining records : 0

test/com/rtg/vcf/resources/snpfiltertest5_exp.vcf

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ g1 13 . C . 44.9 PASS . GT:DP:RE:AR:AB:GQ:RS 0/0:5:0.050:0.000:1.000:44.9:C,5,0.
1414
g1 14 . A . 47.2 PASS . GT:DP:RE:AR:AB:GQ:RS 0/0:5:0.050:0.000:1.000:47.2:A,5,0.050
1515
g1 15 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
1616
g1 16 . C . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
17-
g1 17 . T . 29.7 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
18-
g1 18 . A . 29.7 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
17+
g1 17 . T . 29.7 . . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
18+
g1 18 . A . 29.7 . . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
1919
g1 19 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
2020
g1 20 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
2121
g1 26 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/1:10:0.000:0.000:0.260:27.9
@@ -24,7 +24,7 @@ g1 28 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.000:0.760:27.9
2424
g1 29 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.000:0.240:27.9
2525
g1 30 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/0:10:0.000:0.000:0.760:27.9
2626
g1 31 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/0:10:0.000:0.000:0.240:27.9
27-
g1 32 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.000:0.740:27.9
27+
g1 32 . G . 27.9 . . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.000:0.740:27.9
2828
g1 33 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/1:10:0.000:0.000:0.000:27.9
2929
g1 34 . G A,T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/2:10:0.000:0.000:0.000:27.9
3030
g1 35 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.000:0.500:27.9

test/com/rtg/vcf/resources/snpfiltertest5_exp_all.vcf

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,22 @@ g1 13 . C . 44.9 PASS . GT:DP:RE:AR:AB:GQ:RS 0/0:5:0.050:0.000:1.000:44.9:C,5,0.
1414
g1 14 . A . 47.2 PASS . GT:DP:RE:AR:AB:GQ:RS 0/0:5:0.050:0.000:1.000:47.2:A,5,0.050
1515
g1 15 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
1616
g1 16 . C . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
17-
g1 17 . T . 29.7 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
18-
g1 18 . A . 29.7 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
17+
g1 17 . T . 29.7 . . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
18+
g1 18 . A . 29.7 . . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:29.7
1919
g1 19 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
2020
g1 20 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:0:0.000:0.000:0.000:27.9
2121
g1 21 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/1:10:0.000:0.101:0.000:27.9
2222
g1 22 . G A,T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/2:10:0.000:0.101:0.000:27.9
2323
g1 23 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.101:0.500:27.9
2424
g1 24 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/0:10:0.000:0.101:0.500:27.9
25-
g1 25 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.101:1.000:27.9
25+
g1 25 . G . 27.9 . . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.101:1.000:27.9
2626
g1 26 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/1:10:0.000:0.000:0.260:27.9
2727
g1 27 . G A,T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/2:10:0.000:0.000:0.260:27.9
2828
g1 28 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.000:0.760:27.9
2929
g1 29 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.000:0.240:27.9
3030
g1 30 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/0:10:0.000:0.000:0.760:27.9
3131
g1 31 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/0:10:0.000:0.000:0.240:27.9
32-
g1 32 . G . 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.000:0.740:27.9
32+
g1 32 . G . 27.9 . . GT:DP:RE:AR:AB:GQ 0/0:10:0.000:0.000:0.740:27.9
3333
g1 33 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/1:10:0.000:0.000:0.000:27.9
3434
g1 34 . G A,T 27.9 PASS . GT:DP:RE:AR:AB:GQ 1/2:10:0.000:0.000:0.000:27.9
3535
g1 35 . G T 27.9 PASS . GT:DP:RE:AR:AB:GQ 0/1:10:0.000:0.000:0.500:27.9

0 commit comments

Comments
 (0)