Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions modules/nf-core/sentieoncli/dnascopelongread/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
FROM debian:stable-20260112-slim AS downloader

ARG SENTIEON_VERSION=202503.02
ARG SENTIEON_CLI_VERSION=1.5.2

RUN test -n "$SENTIEON_VERSION"

LABEL container.base.image="debian:stable-20260112-slim" \
software.version="${SENTIEON_VERSION}" \
software.website="https://www.sentieon.com/"

# Download the software from the permalink
RUN apt-get update && apt-get install -y curl
RUN mkdir -p /opt/sentieon/
RUN curl -L "https://s3.amazonaws.com/sentieon-release/software/sentieon-genomics-${SENTIEON_VERSION}.tar.gz" | tar -zxf - -C /opt/sentieon/

# Build gnuplot
RUN apt-get update && apt-get install -y curl make gcc gzip libgd-dev libcairo2-dev libpango1.0-dev && \
curl -L "https://downloads.sourceforge.net/project/gnuplot/gnuplot/6.0.2/gnuplot-6.0.2.tar.gz" | \
tar -zxf - && \
cd gnuplot-6.0.2 && \
./configure && \
make install

# Build the container
FROM debian:stable-20260112-slim
ENV SENTIEON_VERSION=$SENTIEON_VERSION

COPY --from=downloader /opt/sentieon/sentieon-genomics-${SENTIEON_VERSION} /opt/sentieon/sentieon-genomics-${SENTIEON_VERSION}
COPY --from=downloader /usr/local/bin/gnuplot /usr/local/bin/gnuplot
COPY --from=downloader /usr/local/libexec/gnuplot/6.0/gnuplot_x11 /usr/local/libexec/gnuplot/6.0/gnuplot_x11
COPY --from=downloader /usr/local/share/gnuplot/6.0 /usr/local/share/gnuplot/6.0
CMD ["/bin/bash"]

# Install jemalloc as the recommended memory allocation tool, see https://support.sentieon.com/appnotes/jemalloc/
# Install procps for process monitoring
RUN apt-get update && apt-get install -y \
libjemalloc2 \
procps \
libgd-dev \
libcairo2-dev \
libpango1.0-dev \
locales \
git \
curl \
ca-certificates \
bzip2 \
isal \
&& sed -i 's/^# *\(en_US.UTF-8\)/\1/' /etc/locale.gen \
&& dpkg-reconfigure --frontend=noninteractive locales \
&& echo "LANG=en_US.UTF-8" | tee /etc/default/locale \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives

ENV LC_ALL=en_US.UTF-8
ENV SENTIEON_INSTALL_DIR=/opt/sentieon/sentieon-genomics-$SENTIEON_VERSION
ENV PATH=${SENTIEON_INSTALL_DIR}/bin:${PATH}
ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2

ARG MICROMAMBA_VERSION=1.5.8-0
ARG MICROMAMBA_ARCH=linux-64
ENV MAMBA_ROOT_PREFIX=/opt/conda
ENV PATH=${MAMBA_ROOT_PREFIX}/bin:${PATH}

RUN set -eux; \
mkdir -p "$MAMBA_ROOT_PREFIX"; \
curl -fsSL "https://github.com/mamba-org/micromamba-releases/releases/download/${MICROMAMBA_VERSION}/micromamba-${MICROMAMBA_ARCH}" -o /usr/local/bin/micromamba; \
chmod 755 /usr/local/bin/micromamba

ARG SENTIEON_CLI_REPO=https://github.com/Sentieon/sentieon-cli.git

RUN set -eux; \
micromamba create -y -n sentieon-cli -c conda-forge -c bioconda \
python=3.11 \
pip=24.0 \
samtools=1.22 \
bcftools=1.22 \
htslib=1.22 \
pysam=0.23.0 \
click=8.1.7 \
pyyaml=6.0.1 \
pandas=2.2.2 \
requests=2.32.3 \
bedtools=2.31.1 \
mosdepth=0.3.12 \
hificnv=1.0.1 \
packaging=24.0 \
colorlog=6.9.0 \
importlib_resources=6.4.5; \
micromamba clean --all --yes

RUN set -eux; \
micromamba run -n sentieon-cli pip install --no-deps "git+${SENTIEON_CLI_REPO}@${SENTIEON_CLI_VERSION}";

ENV SENTIEON_CLI_ENV=${MAMBA_ROOT_PREFIX}/envs/sentieon-cli
ENV PATH=${SENTIEON_CLI_ENV}/bin:${MAMBA_ROOT_PREFIX}/bin:${SENTIEON_INSTALL_DIR}/bin:${PATH}
ENV CONDA_DEFAULT_ENV=sentieon-cli

# A default jemalloc configuration that should work well for most use-cases, see http://jemalloc.net/jemalloc.3.html
ENV MALLOC_CONF=metadata_thp:auto,background_thread:true,dirty_decay_ms:30000,muzzy_decay_ms:30000
89 changes: 89 additions & 0 deletions modules/nf-core/sentieoncli/dnascopelongread/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
process SENTIEONCLI_DNASCOPELONGREAD {

tag "${meta.id}"
label 'process_high'
label 'sentieon'

secret 'SENTIEON_LICENSE_BASE64'

container "docker.io/clinicalgenomicslund/dnascope-longread:1.5.2"

input:
tuple val(meta), path(bam), path(bai), path(diploid_intervals_bed), path(haploid_intervals_bed)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(model_bundle)
tuple val(meta5), path(dbsnp)
tuple val(meta6), path(dbsnp_tbi)
tuple val(meta7), path(pop_vcf)
tuple val(meta8), path(pop_vcf_tbi)
tuple val(meta9), path(cnv_excluded_regions)
val tech
val emit_snvs
val emit_gvcf
val emit_svs
val emit_cnvs
val emit_qc

output:
tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf, optional: true
tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: vcf_tbi, optional: true
tuple val(meta), path("${prefix}.g.vcf.gz"), emit: gvcf, optional: true
tuple val(meta), path("${prefix}.g.vcf.gz.tbi"), emit: gvcf_tbi, optional: true
tuple val("${task.process}"), val("sentieon-cli"), eval("sentieon-cli --version"), topic: versions, emit: versions_sentieon_cli
tuple val("${task.process}"), val("sentieon"), eval("sentieon driver --version 2>&1 | sed -e 's/sentieon-genomics-//g'"), topic: versions, emit: versions_sentieon
tuple val("${task.process}"), val('bedtools'), eval("bedtools --version | sed -e 's/bedtools v//g'"), topic: versions, emit: versions_bedtools
tuple val("${task.process}"), val("samtools"), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools
tuple val("${task.process}"), val("bcftools"), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

def prefix = task.ext.prefix ?: "${meta.id}"

def diploid_bed_cmd = diploid_intervals_bed ? "--diploid_bed ${diploid_intervals_bed}" : ""
def haploid_bed_cmd = haploid_intervals_bed ? "--haploid_bed ${haploid_intervals_bed}" : ""
def dbsnp_cmd = dbsnp ? "--dbsnp ${dbsnp}" : ""
def popvcf_cmd = pop_vcf ? "--pop_vcf ${pop_vcf}" : ""
def gvcf_cmd = emit_gvcf ? "--gvcf" : ""
def vcf_cmd = emit_snvs ? "" : "--skip_small_variants"
def sv_cmd = emit_svs ? "" : "--skip_svs"
def cnv_cmd = emit_cnvs ? "" : "--skip_cnv"
def cnv_excluded_regions_cmd = cnv_excluded_regions ? "" : "--skip_mosdepth"
def qc_cmd = emit_qc ? "" : "--skip_mosdepth"
"""
sentieon-cli dnascope-longread \\
${args} \\
--cores ${task.cpus} \\
--tech ${tech} \\
-r ${fasta} \\
-i ${bam} \\
-m ${model_bundle} \\
${diploid_bed_cmd} \\
${haploid_bed_cmd} \\
${dbsnp_cmd} \\
${popvcf_cmd} \\
${gvcf_cmd} \\
${vcf_cmd} \\
${sv_cmd} \\
${cnv_cmd} \\
${qc_cmd} \\
${cnv_excluded_regions_cmd} \\
--skip_mosdepth \\
--skip_cnv \\
--skip_svs \\
${prefix}.vcf.gz
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}"
"""
echo "" | gzip > ${prefix}.vcf.gz
touch ${prefix}.vcf.gz.tbi
echo "" | gzip > ${prefix}.g.vcf.gz
touch ${prefix}.g.vcf.gz.tbi
"""
}
Loading