Skip to content

Commit 67f723a

Browse files
committed
Merge pull request #12 from brentp/dev
Dev
2 parents 7e8f2ca + 9383b70 commit 67f723a

20 files changed

+869
-568
lines changed

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,11 @@ src/slivarpkg/groups
66
src/slivarpkg/pracode
77
src/slivarpkg/sl_gnotate
88
__tmp.groups
9+
sli.var
10+
*.zip
11+
*.bin
12+
src/slivarpkg/filter
13+
src/slivarpkg/make_gnotate
14+
src/slivarpkg/siset
15+
916

.travis.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
language: c
2+
services: docker
3+
before_install:
4+
- docker pull brentp/musl-hts-nim
5+
script:
6+
- docker run -w /test -v `pwd`:/test brentp/musl-hts-nim scripts/ci-tests.sh
7+
branches:
8+
except:
9+
- gh-pages
10+
before_deploy:
11+
- git config --local user.name "brentp"
12+
- git config --local user.email "[email protected]"
13+
- export TRAVIS_TAG=${TRAVIS_TAG:-$(date +'%Y%m%d%H%M%S')-$(git log --format=%h -1)}
14+
- git tag $TRAVIS_TAG || true
15+
deploy:
16+
# gem install travis
17+
# travis setup releases --force
18+
provider: releases
19+
api_key:
20+
secure: xIC7UCrPPB6lQqx9ZLcUP3OOdwF7NEClgE+HN3u0TfCbDkJvCdatrhezzRp09RhP1snaus56cl4U2oqDjJr+16E1jX/sHMfhF5tinreIRb3tgxlidtGEpiq8ljwK6cLB89Iq/yaihamqlG4Pw/R8AIlMCMyi99yHObTpaCYCptiOygUPkVtpDw5KkWoya9Ol7vrCyTVQK5rENEeivzpfPGtGNKHvNwCxI4f7njLI0VNHP1XLoAieb59ttDEgMIfjWI/T2mwjunIjgQtQKzVbBGUsG9M9eYhc5Jw0xZJQfoRiGn0p+ocxfVNX3H5oVM31sd/EEtuEXPfEO5UKxt5ro42ijd24kNjcVC8rVJxyPdqasjb7IQIXRX8CZqd9SF3boCGAbqSG39obdR2g7BoisL5VmZ/IcTCoj/zE5t9loyVLLJf24YViJwS39l9L51M4R4Oq5jHOtJGdrd/OhTYB9xAtc+DAEKiv1JkDmAJV34gAaZd12SnTCps2BtlttqqiTvXjgxIo62XgrXyBKtNzZ0+toYls2RBKdATBv615+zGQcxnXJkPuvl7DvaNJ2i1xCMNisPTSAFfVSciVulq1/k/IHrMYxY4ceRzrmkfUxzb4STETaNHAzq9cmF8MEa5LSLgSDCB0EqBHyll8JcBDlANBvgHUsUiwUbV0mKYUP6o=
21+
skip_cleanup: true
22+
draft: true
23+
file: "$TRAVIS_BUILD_DIR/slivar_static"
24+
on:
25+
repo: brentp/slivar
26+
tags: true

README.md

Lines changed: 67 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
1-
# slivar: filter/annotate variants in VCF/BCF format with simple expressions
1+
# slivar: filter/annotate variants in VCF/BCF format with simple expressions [![Build Status](https://travis-ci.org/brentp/slivar.svg?branch=master)](https://travis-ci.org/brentp/slivar)
2+
3+
slivar is a set of command-line tools that enables rapid querying and filtering of VCF files.
4+
It facilitates operations on trios and [groups](#groups) and allows arbitrary expressions using simple javascript.
5+
6+
#### use-cases for `slivar`
7+
8+
+ annotate variants with combined exomes + whole genomes at > 30K variants/second using only a 1.5GB compressed annotation file
9+
+ call *denovo* variants with a simple expression that uses *mom*, *dad*, *kid* labels that is applied to each trio in a cohort (as inferred from a pedigree file).
10+
`kid.alts == 1 && mom.alts == 0 && dad.alts == 0 && kid.DP > 10 && mom.DP > 10 && dad.DP > 10`
11+
+ define and filter on arbitrary groups with labels. For example, 7 sets of samples each with 1 normal and 3 tumor time-points:
12+
`normal.AD[0] = 0 && tumor1.AB < tumor2.AB && tumor2.AB < tumor3.AB`
13+
+ filter variants with simple expressions:
14+
`variant.call_rate > 0.9 && variant.FILTER == "PASS" && INFO.AC < 22 && variant.num_hom_alt == 0`
215

316
slivar has sub-commands:
417
+ [expr](#expr): trio and group expressions and filtering
5-
+ [gnotate](#gnotate): rapidly annotate a VCF/BCF with gnomad
6-
+ filter: filter a VCF with an expression
18+
+ [gnotate](#gnotate): filter and/or annotate a VCF/BCF files
19+
+ [make-gnotate](#gnotate): make a compressed zip file of annotations for use by slivar
720

821
# Table of Contents
922

@@ -47,28 +60,35 @@ will be tested on each of those 200 trios.
4760
The expressions are javascript so the user can make these as complex as needed.
4861

4962

50-
```
63+
```bash
5164
slivar expr \
5265
--pass-only \ # output only variants that pass one of the filters (default is to output all variants)
5366
--vcf $vcf \
5467
--ped $ped \
55-
--gnomad $gnomad_zip \ # a compressed gnomad zip that allows fast annotation so that `gnomad_af` is available in the expressions below.
56-
--load functions.js \ # any valid javascript is allowed here.
68+
# compressed zip that allows fast annotation so that `gnomad_af` is available in the expressions below.
69+
--gnotate $gnomad_af.zip \
70+
# any valid javascript is allowed in a file here. provide functions to be used below.
71+
--js js/functions.js \
5772
--out-vcf annotated.bcf \
58-
--info "variant.call_rate > 0.9" \ # this filter is applied before the trio filters and can speed evaluation if it is stringent.
73+
# this filter is applied before the trio filters and can speed evaluation if it is stringent.
74+
--info "variant.call_rate > 0.9" \
5975
--trio "denovo:kid.alts == 1 && mom.alts == 0 && dad.alts == 0 \
6076
&& kid.AB > 0.25 && kid.AB < 0.75 \
6177
&& (mom.AD[1] + dad.AD[1]) == 0 \
6278
&& kid.GQ >= 20 && mom.GQ >= 20 && dad.GQ >= 20 \
6379
&& kid.DP >= 12 && mom.DP >= 12 && dad.DP >= 12" \
64-
--trio "informative:kid.GQ > 20 && dad.GQ > 20 && mom.GQ > 20 && kid.alts == 1 && ((mom.alts == 1 && dad.alts == 0) || (mom.alts == 0 && dad.alts == 1))" \
65-
--trio "recessive:recessive_func(kid, mom, dad)"
80+
--trio "informative:kid.GQ > 20 && dad.GQ > 20 && mom.GQ > 20 && kid.alts == 1 && \
81+
((mom.alts == 1 && dad.alts == 0) || (mom.alts == 0 && dad.alts == 1))" \
82+
--trio "recessive:trio_autosomal_recessive(kid, mom, dad)"
6683

6784
```
6885

6986
Note that `slivar` does not give direct access to the genotypes, instead exposing `alts` where 0 is homozygous reference, 1 is heterozygous, 2 is
7087
homozygous alternate and -1 when the genotype is unknown. It is recommended to **decompose** a VCF before sending to `slivar`
7188

89+
Here it is assumed that `trio_autosomal_recessive` is defined in `functions.js`; an example implementation of that
90+
and other useful functions is provided [here](https://github.com/brentp/slivar/blob/master/js/functions.js
91+
7292
#### Groups
7393

7494
A `trio` is a special-case of a `group` that can be inferred from a pedigree. For more specialized use-cases, a `group` can be
@@ -91,7 +111,7 @@ sample7 sample8 sample9 sample12
91111
```
92112

93113
where `sample10` will be available as "sibling" in the first family and an expression like:
94-
```
114+
```bash
95115
kid.alts == 1 && mom.alts == 0 && dad.alts == 0 and sibling.alts == 0
96116
```
97117
could be specified and it would automatically be applied to each of the 3 families.
@@ -106,7 +126,7 @@ ss3 ss16 ss17 ss18 ss19
106126

107127
where, again each row is a sample and the ID's (starting with "ss") will be injected for each sample to allow a single
108128
expression like:
109-
```
129+
```bash
110130
normal.alts == 0 && normal.DP > 10 \
111131
&& tumor1.AB > 0 \
112132
&& tumor1.AB < tumor2.AB \
@@ -117,18 +137,49 @@ normal.alts == 0 && normal.DP > 10 \
117137
to find a somatic variant that has increasing frequency (AB is allele balance) along the tumor time-points.
118138

119139

140+
More detail on groups is provided [here](https://github.com/brentp/slivar/wiki/groups-in-slivar)
141+
120142
### Gnotate
121143

122-
This uses a compressed, reduced representation of gnomad allele frequencies **and FILTERs** to reduce from the 600+ GB of data for the
123-
**whole genome and exome** to a 1.5GB file distributed [here](https://s3.amazonaws.com/gemini-annotations/gnomad-2.1.zip).
124-
The zip file encodes the popmax_AF (whichever is higher between whole genome and exome) and the FILTER for every variant in gnomad.
125-
It can annotate at faster than 10K variants per second.
144+
The `gnotate` sub-command allows filtering and/or annotating.
145+
More extensive documentation and justification for annotating with `gnotate` are [here](https://github.com/brentp/slivar/wiki/gnotate)
146+
147+
`gnotate` uses a compressed, reduced representation of a single value pulled from a (population VCF) along with a boolean that indicates a
148+
non-pass filter. This can, for example, reduce the 600+ GB of data for the **whole genome and exome** from gnomad to a 1.5GB file
149+
distributed [here](https://s3.amazonaws.com/gemini-annotations/gnomad-2.1.zip).
150+
The zip file encodes the popmax_AF (whichever is higher between whole genome and exome) and the presence of FILTER for every variant
151+
in gnomad.
152+
153+
It can annotate at faster than 30K variants per second (limited by speed of parsing the query VCF).
154+
155+
```
156+
slivar gnotate --vcf $input_vcf -o $output_bcf --threads 3 --gnotate encoded.zip
157+
```
158+
It's also possible to use `gnotate` as a filtering command without specifying any `--gnotate` arguments.
159+
160+
161+
#### make-gnotate
162+
163+
Users can make their own `gnotate` files like:
164+
165+
```bash
166+
slivar make-gnotate --prefix gnomad \
167+
--field AF_popmax:gnomad_popmax_af \
168+
--field nhomalt:gnomad_num_homalt \
169+
gnomad.exomes.r2.1.sites.vcf.gz gnomad.genomes.r2.1.sites.vcf.gz
170+
```
171+
172+
this will pull `AF_popmax` and `nhomalt` from the INFO field and put them into `gnomad.zip` as `gnomad_popmax_af` and `gnomad_num_homalt` respectively.
173+
The resulting zip file will contain the union of values seen in the exome and genomes files with the maximum value for any intersection.
174+
Note that the names (`gnomad_popmax_af` and `gnomad_num_homalt` in this case) should be chosen carefully as those will be the names added to the INFO of any file to be annotated with the resulting `gnomad.zip`
175+
176+
More information on `make-gnotate` is [in the wiki](https://github.com/brentp/slivar/wiki/make-gnotate)
126177

127-
slivar gnotate --vcf $input_vcf -o $output_bcf --threads 3 -g gnomad-2.1.zip
128178

129179
## Installation
130180

131181
get the latest binary from: https://github.com/brentp/slivar/releases/latest
182+
This will require libhts.so (from [htslib](https://htslib.org)) to be in the usual places or in a directory indicated in `LD_LIBRARY_PATH`.
132183

133184
or use via docker from: [brentp/slivar:latest](https://hub.docker.com/r/brentp/slivar)
134185

docker/Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ RUN cd / && \
3030
echo 'PATH=/nim/bin:/root/.nimble/bin:$PATH' >> ~/.bashrc && \
3131
echo 'PATH=/nim/bin:/root/.nimble/bin:$PATH' >> ~/.bash_profile
3232

33-
RUN cd / && export PATH=/nim/bin:/root/.nimble/bin:$PATH && \
33+
RUN cd / \
34+
&& export PATH=/nim/bin:/root/.nimble/bin:$PATH && \
3435
source scl_source enable devtoolset-2 && \
3536
nimble install -y nimgen && \
3637
cd / && git clone -b dev https://github.com/brentp/duktape-nim && \
@@ -40,4 +41,4 @@ RUN cd / && export PATH=/nim/bin:/root/.nimble/bin:$PATH && \
4041
cd slivar && \
4142
nimble install --verbose -y && \
4243
nim c -d:release -o:/usr/bin/slivar --passC:-flto src/slivar && \
43-
rm -rf /slivar /duktape-nim && slivar trio -h
44+
rm -rf /slivar /duktape-nim && slivar expr -h

js/functions.js

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
function trio_denovo(kid, dad, mom) {
2+
// alts are 0:hom_ref, 1:het, 2:hom_alt, -1:unknown
3+
if(!(kid.alts == 1 && mom.alts == 0 && dad.alts == 0)){ return false}
4+
// sufficient depth in all
5+
if(kid.DP < 7 || mom.DP < 7 || dad.DP < 7) { return false; }
6+
// no evidence for alternate in the parents
7+
if((mom.AD[1] + dad.AD[1]) > 0) { return false; }
8+
// check the kid's allele balance.
9+
if(kid.AB < 0.2 || kid.AB > 0.8) { return false; }
10+
return true
11+
}
12+
13+
function trio_autosomal_dominant(kid, dad, mom) {
14+
// affected samples must be het.
15+
if(!(kid.affected == (kid.alts == 1) && mom.affected == (mom.alts == 1) && dad.affected == (dad.alts == 1))) { return false; }
16+
if(kid.DP < 7 || mom.DP < 7 || dad.DP < 7) { return false; }
17+
return kid.affected
18+
}
19+
20+
function trio_autosomal_recessive(kid, dad, mom) {
21+
if(!(kid.affected == (kid.alts == 2) && mom.affected == (mom.alts == 2) && dad.affected == (dad.alts == 2))) { return false; }
22+
if(kid.DP < 7 || mom.DP < 7 || dad.DP < 7) { return false; }
23+
if(kid.GQ < 10 || mom.GQ < 10 || dad.GQ < 10) { return false; }
24+
return kid.affected
25+
}

scripts/ci-tests.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/sh
2+
set -e
3+
apk add -q bash
4+
mkdir test-tmp && cd test-tmp
5+
git clone --depth 1 https://github.com/samtools/htslib
6+
git clone --depth 1 https://github.com/samtools/bcftools
7+
cd bcftools && make -j4 install && cd ../.. && rm -rf test-tmp
8+
nimble install -y && nimble test
9+
nim c -d:release -o:/test/slivar_static -d:nsb_static src/slivar

slivar.nimble

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ license = "MIT"
1818

1919
# Dependencies
2020

21-
requires "hts >= 0.2.7", "binaryheap", "zip", "https://github.com/brentp/duktape-nim#dev", "https://github.com/brentp/bpbio", "https://github.com/brentp/nim-minizip"
21+
requires "hts >= 0.2.7", "nimgen", "binaryheap", "zip", "https://github.com/brentp/duktape-nim#dev", "https://github.com/brentp/bpbio", "https://github.com/brentp/nim-minizip"
2222
srcDir = "src"
2323
installExt = @["nim"]
2424

src/slivar.nim

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import ./slivarpkg/evaluator
55
import ./slivarpkg/groups
66

77
import ./slivarpkg/gnotate
8-
import ./slivarpkg/sl_gnotate
8+
import ./slivarpkg/make_gnotate
99
import ./slivarpkg/filter
1010
import strutils
1111
import hts/vcf
@@ -19,7 +19,7 @@ proc expr_main*(dropfirst:bool=false) =
1919
let doc = """
2020
slivar -- variant expression for great good
2121
22-
Usage: slivar expr [options --pass-only --out-vcf <path> --vcf <path> --ped <path> --trio=<expression>... --group-expr=<expression>... --info=<expression>]
22+
Usage: slivar expr [options --pass-only --out-vcf <path> --vcf <path> --ped <path> --trio=<expression>... --group-expr=<expression>... --info=<expression> --gnotate=<path>...]
2323
2424
About:
2525
@@ -48,10 +48,10 @@ Options:
4848
--pass-only only output variants that pass at least one of the filters [default: false]
4949
--trio <string>... an expression applied to each trio where "mom", "dad", "kid" labels are available from trios inferred from
5050
a ped file.
51-
--group-expr <string>... expressions applied to the groups defined in the alias option.
51+
--group-expr <string>... expressions applied to the groups defined in the alias option [see: https://github.com/brentp/slivar/wiki/groups-in-slivar].
5252
--info <string> a filter expression using only variables from the info field and variant attributes. If this filter
5353
does not pass, the trio and alias expressions will not be applied.
54-
-g --gnomad <path> optional path compressed gnomad allele frequencies distributed at: https://github.com/brentp/slivar/releases
54+
-g --gnotate <path>... optional paths compressed gnote file (made with slivar make-gnotate)
5555
"""
5656

5757
var args: Table[string, docopt.Value]
@@ -74,7 +74,7 @@ Options:
7474
ivcf:VCF
7575
ovcf:VCF
7676
groups: seq[Group]
77-
gno:Gnotater
77+
gnos:seq[Gnotater]
7878
samples:seq[Sample]
7979

8080
if not open(ivcf, $args["--vcf"], threads=1):
@@ -96,21 +96,25 @@ Options:
9696
if $args["--alias"] != "nil":
9797
groups = parse_groups($args["--alias"], samples)
9898

99-
if $args["--gnomad"] != "nil":
100-
doAssert gno.open($args["--gnomad"])
101-
gno.update_header(ivcf)
99+
if $args["--gnotate"] != "nil":
100+
for p in @(args["--gnotate"]):
101+
var gno:Gnotater
102+
doAssert gno.open(p)
103+
gno.update_header(ivcf)
104+
gnos.add(gno)
102105

103106
ovcf.copy_header(ivcf.header)
104107
var
105108
trioTbl: TableRef[string,string]
106109
grpTbl: TableRef[string, string]
110+
iTbl: TableRef[string, string]
107111

108112
if $args["--trio"] != "nil":
109113
trioTbl = ovcf.getExpressionTable(@(args["--trio"]), $args["--vcf"])
110114
if $args["--group-expr"] != "nil":
111115
grpTbl = ovcf.getExpressionTable(@(args["--group-expr"]), $args["--vcf"])
112116
doAssert ovcf.write_header
113-
var ev = newEvaluator(samples, groups, trioTbl, grpTbl, $args["--info"], gno, field_names=id2names(ivcf.header))
117+
var ev = newEvaluator(samples, groups, iTbl, trioTbl, grpTbl, $args["--info"], gnos, field_names=id2names(ivcf.header))
114118

115119
if $args["--js"] != "nil":
116120
var js = $readFile($args["--js"])
@@ -157,8 +161,8 @@ proc main*() =
157161

158162
var dispatcher = {
159163
"expr": pair(f:expr_main, description:"trio and group expressions and filtering"),
160-
"gnotate": pair(f:sl_gnotate.main, description:"rapidly annotate a VCF/BCF with gnomad"),
161-
"filter": pair(f:filter.main, description:"filter a vcf with javascript expressions"),
164+
"gnotate": pair(f:filter.main, description:"filter and/or annotate a VCF/BCF"),
165+
"make-gnotate": pair(f:make_gnotate.main, description:"make a gnotate zip file for a given VCF"),
162166
}.toTable
163167

164168
stderr.write_line "slivar version: " & slivarVersion

src/slivarpkg/duko.nim

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,16 @@ proc check*(d:Dukexpr): bool {.inline.} =
8181
result = d.ctx.duk_get_boolean(-1)
8282
d.ctx.pop()
8383

84+
proc asfloat*(d:Dukexpr): float32 {.inline.} =
85+
## evaluate a (previously compiled) expression and return a float32
86+
discard d.ctx.duk_push_heapptr(d.vptr)
87+
if d.ctx.duk_pcall(0) != 0:
88+
var err = $d.ctx.duk_safe_to_string(-1)
89+
raise newException(ValueError, "error from duktape: " & $err & " for expression:" & d.expr & "\n")
90+
else:
91+
result = d.ctx.duk_get_number(-1).float
92+
d.ctx.pop()
93+
8494
proc check*(ctx: DTContext, expression: string): bool {.inline.} =
8595
## evaluate the expression in the current context
8696
if ctx.duk_peval_string(expression):

0 commit comments

Comments
 (0)