From 49bb7a470942eb141bb9509b24075df65ac2d674 Mon Sep 17 00:00:00 2001 From: ardydavari Date: Tue, 6 Oct 2020 19:10:17 +0000 Subject: [PATCH] changed base image --- Dockerfile | 36 +--- Dockerfile.bak | 38 ++++ battenberg_wgs.R | 416 +------------------------------------ refit.R | 528 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 573 insertions(+), 445 deletions(-) create mode 100755 Dockerfile.bak create mode 100755 refit.R diff --git a/Dockerfile b/Dockerfile index a6a9475..1b25f57 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,37 +1,5 @@ -FROM rocker/r-base:4.0.2 +FROM blcdsdockerregistry/docker-bb-base:latest -RUN apt-get update && apt-get upgrade -y -RUN apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev -RUN install2.r BiocManager -RUN installBioc.r XML httr openssl BiocFileCache Rsamtools GenomicFeatures Rhtslib GenomicRanges SummarizedExperiment rtracklayer BSgenome GenomeInfoDb -RUN installBioc.r devtools splines readr doParallel ggplot2 RColorBrewer gridExtra gtools parallel VariantAnnotation -RUN installGithub.r Crick-CancerGenomics/ascat/ASCAT -RUN installBioc.r VariantAnnotation -RUN installGithub.r Wedge-lab/battenberg@dev -RUN installBioc.r optparse - -RUN apt-get install -y curl default-jre - -RUN mkdir /tmp/downloads - -RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/samtools/htslib/archive/1.7.tar.gz && \ - mkdir /tmp/downloads/htslib && \ - tar -C /tmp/downloads/htslib --strip-components 1 -zxf tmp.tar.gz && \ - make -C /tmp/downloads/htslib && \ - rm -f /tmp/downloads/tmp.tar.gz - -ENV HTSLIB /tmp/downloads/htslib - -RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/alleleCount/archive/v4.0.0.tar.gz && \ - mkdir /tmp/downloads/alleleCount && \ - tar -C /tmp/downloads/alleleCount --strip-components 1 -zxf tmp.tar.gz && \ - cd /tmp/downloads/alleleCount/c && \ - mkdir bin && \ - make && \ - cp /tmp/downloads/alleleCount/c/bin/alleleCounter /usr/local/bin/. && \ - cd /tmp/downloads && \ - rm -rf /tmp/downloads/alleleCount /tmp/downloads/tmp.tar.gz - -#COPY battenberg_wgs.R /usr/local/bin/battenberg_wgs.R +RUN installGithub.r Wedge-lab/battenberg@9537a58369 CMD ["/bin/bash"] diff --git a/Dockerfile.bak b/Dockerfile.bak new file mode 100755 index 0000000..894bcd7 --- /dev/null +++ b/Dockerfile.bak @@ -0,0 +1,38 @@ +FROM rocker/r-base:4.0.2 + +RUN apt-get update && apt-get upgrade -y +RUN apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev +RUN install2.r BiocManager +RUN installBioc.r XML httr openssl BiocFileCache Rsamtools GenomicFeatures Rhtslib GenomicRanges SummarizedExperiment rtracklayer BSgenome GenomeInfoDb +RUN installBioc.r devtools splines readr doParallel ggplot2 RColorBrewer gridExtra gtools parallel VariantAnnotation +RUN installGithub.r Crick-CancerGenomics/ascat/ASCAT +RUN installBioc.r VariantAnnotation +#RUN installGithub.r Wedge-lab/battenberg@dev +RUN installGithub.r Wedge-lab/battenberg@aa14170714 +RUN installBioc.r optparse + +RUN apt-get install -y curl default-jre + +RUN mkdir /tmp/downloads + +RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/samtools/htslib/archive/1.7.tar.gz && \ + mkdir /tmp/downloads/htslib && \ + tar -C /tmp/downloads/htslib --strip-components 1 -zxf tmp.tar.gz && \ + make -C /tmp/downloads/htslib && \ + rm -f /tmp/downloads/tmp.tar.gz + +ENV HTSLIB /tmp/downloads/htslib + +RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/alleleCount/archive/v4.0.0.tar.gz && \ + mkdir /tmp/downloads/alleleCount && \ + tar -C /tmp/downloads/alleleCount --strip-components 1 -zxf tmp.tar.gz && \ + cd /tmp/downloads/alleleCount/c && \ + mkdir bin && \ + make && \ + cp /tmp/downloads/alleleCount/c/bin/alleleCounter /usr/local/bin/. && \ + cd /tmp/downloads && \ + rm -rf /tmp/downloads/alleleCount /tmp/downloads/tmp.tar.gz + +#COPY battenberg_wgs.R /usr/local/bin/battenberg_wgs.R + +CMD ["/bin/bash"] diff --git a/battenberg_wgs.R b/battenberg_wgs.R index d14aa0b..6045c54 100755 --- a/battenberg_wgs.R +++ b/battenberg_wgs.R @@ -1,398 +1,6 @@ library(Battenberg) library(optparse) - -#' @inheritParams battenberg -battenberg_refit = function(varusepresetrhopsi=F, varpresetrho=NA, varpresetpsi=NA, tumourname, normalname, tumour_data_file, normal_data_file, imputeinfofile, g1000prefix, problemloci, gccorrectprefix=NULL, - repliccorrectprefix=NULL, g1000allelesprefix=NA, ismale=NA, data_type="wgs", impute_exe="impute2", allelecounter_exe="alleleCounter", nthreads=8, platform_gamma=1, phasing_gamma=1, - segmentation_gamma=10, segmentation_kmin=3, phasing_kmin=1, clonality_dist_metric=0, ascat_dist_metric=1, min_ploidy=1.6, - max_ploidy=4.8, min_rho=0.1, min_goodness=0.63, uninformative_BAF_threshold=0.51, min_normal_depth=10, min_base_qual=20, - min_map_qual=35, calc_seg_baf_option=3, skip_allele_counting=F, skip_preprocessing=F, skip_phasing=F, externalhaplotypefile = NA, - usebeagle=FALSE, - beaglejar=NA, - beagleref.template=NA, - beagleplink.template=NA, - beaglemaxmem=10, - beaglenthreads=1, - beaglewindow=40, - beagleoverlap=4, - javajre="java", - write_battenberg_phasing = T, multisample_relative_weight_balanced = 0.25, multisample_maxlag = 100, segmentation_gamma_multisample = 5, - snp6_reference_info_file=NA, apt.probeset.genotype.exe="apt-probeset-genotype", apt.probeset.summarize.exe="apt-probeset-summarize", - norm.geno.clust.exe="normalize_affy_geno_cluster.pl", birdseed_report_file="birdseed.report.txt", heterozygousFilter="none", - prior_breakpoints_file=NULL) { - - requireNamespace("foreach") - requireNamespace("doParallel") - requireNamespace("parallel") - - if (data_type=="wgs" & is.na(ismale)) { - stop("Please provide a boolean denominator whether this sample represents a male donor") - } - - if (data_type=="wgs" & is.na(g1000allelesprefix)) { - stop("Please provide a path to 1000 Genomes allele reference files") - } - - if (data_type=="wgs" & is.null(gccorrectprefix)) { - stop("Please provide a path to GC content reference files") - } - - if (!file.exists(problemloci)) { - stop("Please provide a path to a problematic loci file") - } - - if (!file.exists(imputeinfofile)) { - stop("Please provide a path to an impute info file") - } - - # check whether the impute_info.txt file contains correct paths - check.imputeinfofile(imputeinfofile = imputeinfofile, is.male = ismale, usebeagle = usebeagle) - - # check whether multisample case - nsamples <- length(tumourname) - if (nsamples > 1) { - if (length(skip_allele_counting) < nsamples) { - skip_allele_counting = rep(skip_allele_counting[1], nsamples) - } - if (length(skip_preprocessing) < nsamples) { - skip_preprocessing = rep(skip_preprocessing[1], nsamples) - } - if (length(skip_phasing) < nsamples) { - skip_phasing = rep(skip_phasing[1], nsamples) - } - } - - - if (data_type=="wgs" | data_type=="WGS") { - if (nsamples > 1) { - print(paste0("Running Battenberg in multisample mode on ", nsamples, " samples: ", paste0(tumourname, collapse = ", "))) - } - chrom_names = get.chrom.names(imputeinfofile, ismale) - } else if (data_type=="snp6" | data_type=="SNP6") { - if (nsamples > 1) { - stop(paste0("Battenberg multisample mode has not been tested with SNP6 data")) - } - chrom_names = get.chrom.names(imputeinfofile, TRUE) - logr_file = paste(tumourname, "_mutantLogR.tab", sep="") - allelecounts_file = NULL - } - - for (sampleidx in 1:nsamples) { - - if (!skip_preprocessing[sampleidx]) { - if (data_type=="wgs" | data_type=="WGS") { - # Setup for parallel computing - clp = parallel::makeCluster(nthreads) - doParallel::registerDoParallel(clp) - - prepare_wgs(chrom_names=chrom_names, - tumourbam=tumour_data_file[sampleidx], - normalbam=normal_data_file, - tumourname=tumourname[sampleidx], - normalname=normalname, - g1000allelesprefix=g1000allelesprefix, - g1000prefix=g1000prefix, - gccorrectprefix=gccorrectprefix, - repliccorrectprefix=repliccorrectprefix, - min_base_qual=min_base_qual, - min_map_qual=min_map_qual, - allelecounter_exe=allelecounter_exe, - min_normal_depth=min_normal_depth, - nthreads=nthreads, - skip_allele_counting=skip_allele_counting[sampleidx], - skip_allele_counting_normal = (sampleidx > 1)) - - # Kill the threads - parallel::stopCluster(clp) - - } else if (data_type=="snp6" | data_type=="SNP6") { - - prepare_snp6(tumour_cel_file=tumour_data_file[sampleidx], - normal_cel_file=normal_data_file, - tumourname=tumourname[sampleidx], - chrom_names=chrom_names, - snp6_reference_info_file=snp6_reference_info_file, - apt.probeset.genotype.exe=apt.probeset.genotype.exe, - apt.probeset.summarize.exe=apt.probeset.summarize.exe, - norm.geno.clust.exe=norm.geno.clust.exe, - birdseed_report_file=birdseed_report_file) - - } else { - print("Unknown data type provided, please provide wgs or snp6") - q(save="no", status=1) - } - } - - if (data_type=="snp6" | data_type=="SNP6") { - # Infer what the gender is - WGS requires it to be specified - gender = infer_gender_birdseed(birdseed_report_file) - ismale = gender == "male" - } - - - if (!skip_phasing[sampleidx]) { - - # if external phasing data is provided (as a vcf), split into chromosomes for use in haplotype reconstruction - if (!is.na(externalhaplotypefile) && file.exists(externalhaplotypefile)) { - externalhaplotypeprefix <- paste0(normalname, "_external_haplotypes_chr") - - # if these files exist already, no need to split again - if (any(!file.exists(paste0(externalhaplotypeprefix, 1:length(chrom_names), ".vcf")))) { - - print(paste0("Splitting external phasing data from ", externalhaplotypefile)) - split_input_haplotypes(chrom_names = chrom_names, - externalhaplotypefile = externalhaplotypefile, - outprefix = externalhaplotypeprefix) - } else { - print("No need to split, external haplotype files per chromosome found") - } - } else { - externalhaplotypeprefix <- NA - } - - # Setup for parallel computing - clp = parallel::makeCluster(nthreads) - doParallel::registerDoParallel(clp) - - # Reconstruct haplotypes - # mclapply(1:length(chrom_names), function(chrom) { - foreach::foreach (chrom=1:length(chrom_names)) %dopar% { - print(chrom) - - run_haplotyping(chrom=chrom, - tumourname=tumourname[sampleidx], - normalname=normalname, - ismale=ismale, - imputeinfofile=imputeinfofile, - problemloci=problemloci, - impute_exe=impute_exe, - min_normal_depth=min_normal_depth, - chrom_names=chrom_names, - snp6_reference_info_file=snp6_reference_info_file, - heterozygousFilter=heterozygousFilter, - usebeagle=usebeagle, - beaglejar=beaglejar, - beagleref=gsub("CHROMNAME",if(chrom==23) "X" else chrom, beagleref.template), - beagleplink=gsub("CHROMNAME",if(chrom==23) "X" else chrom, beagleplink.template), - beaglemaxmem=beaglemaxmem, - beaglenthreads=beaglenthreads, - beaglewindow=beaglewindow, - beagleoverlap=beagleoverlap, - externalhaplotypeprefix=externalhaplotypeprefix, - use_previous_imputation=(sampleidx > 1)) - }#, mc.cores=nthreads) - - # Kill the threads as from here its all single core - parallel::stopCluster(clp) - - # Combine all the BAF output into a single file - combine.baf.files(inputfile.prefix=paste(tumourname[sampleidx], "_chr", sep=""), - inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", - outputfile=paste(tumourname[sampleidx], "_heterozygousMutBAFs_haplotyped.txt", sep=""), - no.chrs=length(chrom_names)) - } - - # Segment the phased and haplotyped BAF data - segment.baf.phased(samplename=tumourname[sampleidx], - inputfile=paste(tumourname[sampleidx], "_heterozygousMutBAFs_haplotyped.txt", sep=""), - outputfile=paste(tumourname[sampleidx], ".BAFsegmented.txt", sep=""), - prior_breakpoints_file=prior_breakpoints_file, - gamma=segmentation_gamma, - phasegamma=phasing_gamma, - kmin=segmentation_kmin, - phasekmin=phasing_kmin, - calc_seg_baf_option=calc_seg_baf_option) - - if (nsamples > 1 | write_battenberg_phasing) { - # Write the Battenberg phasing information to disk as a vcf - write_battenberg_phasing(tumourname = tumourname[sampleidx], - SNPfiles = paste0(tumourname[sampleidx], "_alleleFrequencies_chr", 1:length(chrom_names), ".txt"), - imputedHaplotypeFiles = paste0(tumourname[sampleidx], "_impute_output_chr", 1:length(chrom_names), "_allHaplotypeInfo.txt"), - bafsegmented_file = paste0(tumourname[sampleidx], ".BAFsegmented.txt"), - outprefix = paste0(tumourname[sampleidx], "_Battenberg_phased_chr"), - chrom_names = chrom_names, - include_homozygous = F) - } - - } - - - # if this is a multisample run, combine the battenberg phasing outputs, incorporate it and resegment - if (nsamples > 1) { - print("Constructing multisample phasing") - multisamplehaplotypeprefix <- paste0(normalname, "_multisample_haplotypes_chr") - - - # Setup for parallel computing - clp = parallel::makeCluster(nthreads) - doParallel::registerDoParallel(clp) - - # Reconstruct haplotypes - # mclapply(1:length(chrom_names), function(chrom) { - foreach::foreach (chrom=1:length(chrom_names)) %dopar% { - print(chrom) - - get_multisample_phasing(chrom = chrom, - bbphasingprefixes = paste0(tumourname, "_Battenberg_phased_chr"), - maxlag = multisample_maxlag, - relative_weight_balanced = multisample_relative_weight_balanced, - outprefix = multisamplehaplotypeprefix) - - } - - - # continue over all samples to incorporate the multisample phasing - for (sampleidx in 1:nsamples) { - - # rename the original files without multisample phasing info - MutBAFfiles <- paste0(tumourname[sampleidx], "_chr", 1:length(chrom_names), "_heterozygousMutBAFs_haplotyped.txt") - heterozygousdatafiles <- paste0(tumourname[sampleidx], "_chr", 1:length(chrom_names), "_heterozygousData.png") - raffiles <- paste0(tumourname[sampleidx], "_RAFseg_chr", chrom_names, ".png") - segfiles <- paste0(tumourname[sampleidx], "_segment_chr", chrom_names, ".png") - haplotypedandbafsegmentedfiles <- paste0(tumourname[sampleidx], c("_heterozygousMutBAFs_haplotyped.txt", ".BAFsegmented.txt")) - - file.copy(from = MutBAFfiles, to = gsub(pattern = ".txt$", replacement = "_noMulti.txt", x = MutBAFfiles), overwrite = T) - file.copy(from = heterozygousdatafiles, to = gsub(pattern = ".png$", replacement = "_noMulti.png", x = heterozygousdatafiles), overwrite = T) - file.copy(from = raffiles, to = gsub(pattern = ".png$", replacement = "_noMulti.png", x = raffiles), overwrite = T) - file.copy(from = segfiles, to = gsub(pattern = ".png$", replacement = "_noMulti.png", x = segfiles), overwrite = T) - file.copy(from = haplotypedandbafsegmentedfiles, to = gsub(pattern = ".txt$", replacement = "_noMulti.txt", x = haplotypedandbafsegmentedfiles), overwrite = T) - # done renaming, next sections will overwrite orignals - - - foreach::foreach (chrom=1:length(chrom_names)) %dopar% { - print(chrom) - - input_known_haplotypes(chrom = chrom, - chrom_names = chrom_names, - imputedHaplotypeFile = paste0(tumourname[sampleidx], "_impute_output_chr", chrom, "_allHaplotypeInfo.txt"), - externalHaplotypeFile = paste0(multisamplehaplotypeprefix, chrom, ".vcf"), - oldfilesuffix = "_noMulti.txt") - - GetChromosomeBAFs(chrom=chrom, - SNP_file=paste0(tumourname[sampleidx], "_alleleFrequencies_chr", chrom, ".txt"), - haplotypeFile=paste0(tumourname[sampleidx], "_impute_output_chr", chrom, "_allHaplotypeInfo.txt"), - samplename=tumourname[sampleidx], - outfile=paste0(tumourname[sampleidx], "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt"), - chr_names=chrom_names, - minCounts=min_normal_depth) - - # Plot what we have until this point - plot.haplotype.data(haplotyped.baf.file=paste0(tumourname[sampleidx], "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt"), - imageFileName=paste0(tumourname[sampleidx],"_chr",chrom,"_heterozygousData.png"), - samplename=tumourname[sampleidx], - chrom=chrom, - chr_names=chrom_names) - } - - } - - # Kill the threads as from here its single core - parallel::stopCluster(clp) - - for (sampleidx in 1:nsamples) { - - # Combine all the BAF output into a single file - combine.baf.files(inputfile.prefix=paste0(tumourname[sampleidx], "_chr"), - inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", - outputfile=paste0(tumourname[sampleidx], "_heterozygousMutBAFs_haplotyped.txt"), - no.chrs=length(chrom_names)) - - } - - # Segment the phased and haplotyped BAF data - segment.baf.phased.multisample(samplename=tumourname, - inputfile=paste(tumourname, "_heterozygousMutBAFs_haplotyped.txt", sep=""), - outputfile=paste(tumourname, ".BAFsegmented.txt", sep=""), - prior_breakpoints_file=prior_breakpoints_file, - gamma=segmentation_gamma_multisample, - calc_seg_baf_option=calc_seg_baf_option) - - } - - - # Setup for parallel computing - clp = parallel::makeCluster(min(nthreads, nsamples)) - doParallel::registerDoParallel(clp) - - # for (sampleidx in 1:nsamples) { - foreach::foreach (sampleidx=1:nsamples) %dopar% { - print(paste0("Fitting final copy number and calling subclones for sample ", tumourname[sampleidx])) - - if (data_type=="wgs" | data_type=="WGS") { - logr_file = paste(tumourname[sampleidx], "_mutantLogR_gcCorrected.tab", sep="") - allelecounts_file = paste(tumourname[sampleidx], "_alleleCounts.tab", sep="") - } - - # Fit a clonal copy number profile - fit.copy.number(samplename=tumourname[sampleidx], - outputfile.prefix=paste(tumourname[sampleidx], "_", sep=""), - inputfile.baf.segmented=paste(tumourname[sampleidx], ".BAFsegmented.txt", sep=""), - inputfile.baf=paste(tumourname[sampleidx],"_mutantBAF.tab", sep=""), - inputfile.logr=logr_file, - dist_choice=clonality_dist_metric, - ascat_dist_choice=ascat_dist_metric, - min.ploidy=min_ploidy, - max.ploidy=max_ploidy, - min.rho=min_rho, - min.goodness=min_goodness, - uninformative_BAF_threshold=uninformative_BAF_threshold, - gamma_param=platform_gamma, - use_preset_rho_psi=varusepresetrhopsi, - preset_rho=varpresetrho, - preset_psi=varpresetpsi, - read_depth=30) - - # Go over all segments, determine which segements are a mixture of two states and fit a second CN state - callSubclones(sample.name=tumourname[sampleidx], - baf.segmented.file=paste(tumourname[sampleidx], ".BAFsegmented.txt", sep=""), - logr.file=logr_file, - rho.psi.file=paste(tumourname[sampleidx], "_rho_and_psi.txt",sep=""), - output.file=paste(tumourname[sampleidx],"_subclones.txt", sep=""), - output.figures.prefix=paste(tumourname[sampleidx],"_subclones_chr", sep=""), - output.gw.figures.prefix=paste(tumourname[sampleidx],"_BattenbergProfile", sep=""), - masking_output_file=paste(tumourname[sampleidx], "_segment_masking_details.txt", sep=""), - prior_breakpoints_file=prior_breakpoints_file, - chr_names=chrom_names, - gamma=platform_gamma, - segmentation.gamma=NA, - siglevel=0.05, - maxdist=0.01, - noperms=1000, - calc_seg_baf_option=calc_seg_baf_option) - - # Make some post-hoc plots - make_posthoc_plots(samplename=tumourname[sampleidx], - logr_file=logr_file, - subclones_file=paste(tumourname[sampleidx], "_subclones.txt", sep=""), - rho_psi_file=paste(tumourname[sampleidx], "_rho_and_psi.txt", sep=""), - bafsegmented_file=paste(tumourname[sampleidx], ".BAFsegmented.txt", sep=""), - logrsegmented_file=paste(tumourname[sampleidx], ".logRsegmented.txt", sep=""), - allelecounts_file=allelecounts_file) - - # Save refit suggestions for a future rerun - cnfit_to_refit_suggestions(samplename=tumourname[sampleidx], - subclones_file=paste(tumourname[sampleidx], "_subclones.txt", sep=""), - rho_psi_file=paste(tumourname[sampleidx], "_rho_and_psi.txt", sep=""), - gamma_param=platform_gamma) - - } - - # Kill the threads as last part again is single core - parallel::stopCluster(clp) - - if (nsamples > 1) { - print("Assessing mirrored subclonal allelic imbalance (MSAI)") - call_multisample_MSAI(rdsprefix = multisamplehaplotypeprefix, - subclonesfiles = paste0(tumourname, "_subclones.txt"), - chrom_names = chrom_names, - tumournames = tumourname, - plotting = T) - } -} -environment(battenberg_refit) <- environment(battenberg) - - option_list = list( make_option(c("-t", "--tumourname"), type="character", default=NULL, help="Samplename of the tumour", metavar="character"), make_option(c("-n", "--normalname"), type="character", default=NULL, help="Samplename of the normal", metavar="character"), @@ -403,13 +11,6 @@ option_list = list( make_option(c("--skip_allelecount"), type="logical", default=FALSE, action="store_true", help="Provide when alleles don't have to be counted. This expects allelecount files on disk", metavar="character"), make_option(c("--skip_preprocessing"), type="logical", default=FALSE, action="store_true", help="Provide when pre-processing has previously completed. This expects the files on disk", metavar="character"), make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), - make_option(c("--min_ploidy"), type="numeric", default=1.6, help="Minimum ploidy to be considered (Default: 1.6)", metavar="character"), - make_option(c("--max_ploidy"), type="numeric", default=4.8, help="Maximum ploidy to be considered (Default: 4.8)", metavar="character"), - make_option(c("--min_rho"), type="numeric", default=0.1, help="Minimum purity to be considered (Default: 0.1", metavar="character"), - make_option(c("--min_goodness"), type="numeric", default=0.63, help="Minimum goodness of fit required for a purity/ploidy combination to be accepted as a solution (Default: 0.63)", metavar="character"), - make_option(c("--use_presetrhopsi"), type="logical", default=FALSE, help="Use a preset rho and psi for refitting", metavar="character"), - make_option(c("--preset_rho"), type="numeric", default=NULL, help="Preset rho", metavar="character"), - make_option(c("--preset_psi"), type="numeric", default=NULL, help="Preset psi", metavar="character"), make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character") ) @@ -428,13 +29,6 @@ SKIP_PREPROCESSING = opt$skip_preprocessing SKIP_PHASING = opt$skip_phasing NTHREADS = opt$cpu PRIOR_BREAKPOINTS_FILE = opt$bp -MIN_PLOIDY = opt$min_ploidy -MAX_PLOIDY = opt$max_ploidy -MIN_RHO = opt$min_rho -MIN_GOODNESS_OF_FIT = opt$min_goodness -VARUSEPRESETRHOPSI = opt$use_presetrhopsi -VARPRESETRHO = opt$preset_rho -VARPRESETPSI = opt$preset_psi ############################################################################### # 2018-11-01 @@ -457,6 +51,10 @@ SEGMENTATIIN_KMIN = 3 PHASING_KMIN = 1 CLONALITY_DIST_METRIC = 0 ASCAT_DIST_METRIC = 1 +MIN_PLOIDY = 1.6 +MAX_PLOIDY = 4.8 +MIN_RHO = 0.1 +MIN_GOODNESS_OF_FIT = 0.63 BALANCED_THRESHOLD = 0.51 MIN_NORMAL_DEPTH = 10 MIN_BASE_QUAL = 20 @@ -478,10 +76,7 @@ JAVAJRE = "java" # Change to work directory and load the chromosome information setwd(RUN_DIR) -battenberg_refit(varusepresetrhopsi=VARUSEPRESETRHOPSI, - varpresetrho=VARPRESETRHO, - varpresetpsi=VARPRESETPSI, - tumourname=TUMOURNAME, +battenberg(tumourname=TUMOURNAME, normalname=NORMALNAME, tumour_data_file=TUMOURBAM, normal_data_file=NORMALBAM, @@ -525,4 +120,3 @@ battenberg_refit(varusepresetrhopsi=VARUSEPRESETRHOPSI, skip_preprocessing=SKIP_PREPROCESSING, skip_phasing=SKIP_PHASING, prior_breakpoints_file=PRIOR_BREAKPOINTS_FILE) - diff --git a/refit.R b/refit.R new file mode 100755 index 0000000..d14aa0b --- /dev/null +++ b/refit.R @@ -0,0 +1,528 @@ +library(Battenberg) +library(optparse) + + +#' @inheritParams battenberg +battenberg_refit = function(varusepresetrhopsi=F, varpresetrho=NA, varpresetpsi=NA, tumourname, normalname, tumour_data_file, normal_data_file, imputeinfofile, g1000prefix, problemloci, gccorrectprefix=NULL, + repliccorrectprefix=NULL, g1000allelesprefix=NA, ismale=NA, data_type="wgs", impute_exe="impute2", allelecounter_exe="alleleCounter", nthreads=8, platform_gamma=1, phasing_gamma=1, + segmentation_gamma=10, segmentation_kmin=3, phasing_kmin=1, clonality_dist_metric=0, ascat_dist_metric=1, min_ploidy=1.6, + max_ploidy=4.8, min_rho=0.1, min_goodness=0.63, uninformative_BAF_threshold=0.51, min_normal_depth=10, min_base_qual=20, + min_map_qual=35, calc_seg_baf_option=3, skip_allele_counting=F, skip_preprocessing=F, skip_phasing=F, externalhaplotypefile = NA, + usebeagle=FALSE, + beaglejar=NA, + beagleref.template=NA, + beagleplink.template=NA, + beaglemaxmem=10, + beaglenthreads=1, + beaglewindow=40, + beagleoverlap=4, + javajre="java", + write_battenberg_phasing = T, multisample_relative_weight_balanced = 0.25, multisample_maxlag = 100, segmentation_gamma_multisample = 5, + snp6_reference_info_file=NA, apt.probeset.genotype.exe="apt-probeset-genotype", apt.probeset.summarize.exe="apt-probeset-summarize", + norm.geno.clust.exe="normalize_affy_geno_cluster.pl", birdseed_report_file="birdseed.report.txt", heterozygousFilter="none", + prior_breakpoints_file=NULL) { + + requireNamespace("foreach") + requireNamespace("doParallel") + requireNamespace("parallel") + + if (data_type=="wgs" & is.na(ismale)) { + stop("Please provide a boolean denominator whether this sample represents a male donor") + } + + if (data_type=="wgs" & is.na(g1000allelesprefix)) { + stop("Please provide a path to 1000 Genomes allele reference files") + } + + if (data_type=="wgs" & is.null(gccorrectprefix)) { + stop("Please provide a path to GC content reference files") + } + + if (!file.exists(problemloci)) { + stop("Please provide a path to a problematic loci file") + } + + if (!file.exists(imputeinfofile)) { + stop("Please provide a path to an impute info file") + } + + # check whether the impute_info.txt file contains correct paths + check.imputeinfofile(imputeinfofile = imputeinfofile, is.male = ismale, usebeagle = usebeagle) + + # check whether multisample case + nsamples <- length(tumourname) + if (nsamples > 1) { + if (length(skip_allele_counting) < nsamples) { + skip_allele_counting = rep(skip_allele_counting[1], nsamples) + } + if (length(skip_preprocessing) < nsamples) { + skip_preprocessing = rep(skip_preprocessing[1], nsamples) + } + if (length(skip_phasing) < nsamples) { + skip_phasing = rep(skip_phasing[1], nsamples) + } + } + + + if (data_type=="wgs" | data_type=="WGS") { + if (nsamples > 1) { + print(paste0("Running Battenberg in multisample mode on ", nsamples, " samples: ", paste0(tumourname, collapse = ", "))) + } + chrom_names = get.chrom.names(imputeinfofile, ismale) + } else if (data_type=="snp6" | data_type=="SNP6") { + if (nsamples > 1) { + stop(paste0("Battenberg multisample mode has not been tested with SNP6 data")) + } + chrom_names = get.chrom.names(imputeinfofile, TRUE) + logr_file = paste(tumourname, "_mutantLogR.tab", sep="") + allelecounts_file = NULL + } + + for (sampleidx in 1:nsamples) { + + if (!skip_preprocessing[sampleidx]) { + if (data_type=="wgs" | data_type=="WGS") { + # Setup for parallel computing + clp = parallel::makeCluster(nthreads) + doParallel::registerDoParallel(clp) + + prepare_wgs(chrom_names=chrom_names, + tumourbam=tumour_data_file[sampleidx], + normalbam=normal_data_file, + tumourname=tumourname[sampleidx], + normalname=normalname, + g1000allelesprefix=g1000allelesprefix, + g1000prefix=g1000prefix, + gccorrectprefix=gccorrectprefix, + repliccorrectprefix=repliccorrectprefix, + min_base_qual=min_base_qual, + min_map_qual=min_map_qual, + allelecounter_exe=allelecounter_exe, + min_normal_depth=min_normal_depth, + nthreads=nthreads, + skip_allele_counting=skip_allele_counting[sampleidx], + skip_allele_counting_normal = (sampleidx > 1)) + + # Kill the threads + parallel::stopCluster(clp) + + } else if (data_type=="snp6" | data_type=="SNP6") { + + prepare_snp6(tumour_cel_file=tumour_data_file[sampleidx], + normal_cel_file=normal_data_file, + tumourname=tumourname[sampleidx], + chrom_names=chrom_names, + snp6_reference_info_file=snp6_reference_info_file, + apt.probeset.genotype.exe=apt.probeset.genotype.exe, + apt.probeset.summarize.exe=apt.probeset.summarize.exe, + norm.geno.clust.exe=norm.geno.clust.exe, + birdseed_report_file=birdseed_report_file) + + } else { + print("Unknown data type provided, please provide wgs or snp6") + q(save="no", status=1) + } + } + + if (data_type=="snp6" | data_type=="SNP6") { + # Infer what the gender is - WGS requires it to be specified + gender = infer_gender_birdseed(birdseed_report_file) + ismale = gender == "male" + } + + + if (!skip_phasing[sampleidx]) { + + # if external phasing data is provided (as a vcf), split into chromosomes for use in haplotype reconstruction + if (!is.na(externalhaplotypefile) && file.exists(externalhaplotypefile)) { + externalhaplotypeprefix <- paste0(normalname, "_external_haplotypes_chr") + + # if these files exist already, no need to split again + if (any(!file.exists(paste0(externalhaplotypeprefix, 1:length(chrom_names), ".vcf")))) { + + print(paste0("Splitting external phasing data from ", externalhaplotypefile)) + split_input_haplotypes(chrom_names = chrom_names, + externalhaplotypefile = externalhaplotypefile, + outprefix = externalhaplotypeprefix) + } else { + print("No need to split, external haplotype files per chromosome found") + } + } else { + externalhaplotypeprefix <- NA + } + + # Setup for parallel computing + clp = parallel::makeCluster(nthreads) + doParallel::registerDoParallel(clp) + + # Reconstruct haplotypes + # mclapply(1:length(chrom_names), function(chrom) { + foreach::foreach (chrom=1:length(chrom_names)) %dopar% { + print(chrom) + + run_haplotyping(chrom=chrom, + tumourname=tumourname[sampleidx], + normalname=normalname, + ismale=ismale, + imputeinfofile=imputeinfofile, + problemloci=problemloci, + impute_exe=impute_exe, + min_normal_depth=min_normal_depth, + chrom_names=chrom_names, + snp6_reference_info_file=snp6_reference_info_file, + heterozygousFilter=heterozygousFilter, + usebeagle=usebeagle, + beaglejar=beaglejar, + beagleref=gsub("CHROMNAME",if(chrom==23) "X" else chrom, beagleref.template), + beagleplink=gsub("CHROMNAME",if(chrom==23) "X" else chrom, beagleplink.template), + beaglemaxmem=beaglemaxmem, + beaglenthreads=beaglenthreads, + beaglewindow=beaglewindow, + beagleoverlap=beagleoverlap, + externalhaplotypeprefix=externalhaplotypeprefix, + use_previous_imputation=(sampleidx > 1)) + }#, mc.cores=nthreads) + + # Kill the threads as from here its all single core + parallel::stopCluster(clp) + + # Combine all the BAF output into a single file + combine.baf.files(inputfile.prefix=paste(tumourname[sampleidx], "_chr", sep=""), + inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", + outputfile=paste(tumourname[sampleidx], "_heterozygousMutBAFs_haplotyped.txt", sep=""), + no.chrs=length(chrom_names)) + } + + # Segment the phased and haplotyped BAF data + segment.baf.phased(samplename=tumourname[sampleidx], + inputfile=paste(tumourname[sampleidx], "_heterozygousMutBAFs_haplotyped.txt", sep=""), + outputfile=paste(tumourname[sampleidx], ".BAFsegmented.txt", sep=""), + prior_breakpoints_file=prior_breakpoints_file, + gamma=segmentation_gamma, + phasegamma=phasing_gamma, + kmin=segmentation_kmin, + phasekmin=phasing_kmin, + calc_seg_baf_option=calc_seg_baf_option) + + if (nsamples > 1 | write_battenberg_phasing) { + # Write the Battenberg phasing information to disk as a vcf + write_battenberg_phasing(tumourname = tumourname[sampleidx], + SNPfiles = paste0(tumourname[sampleidx], "_alleleFrequencies_chr", 1:length(chrom_names), ".txt"), + imputedHaplotypeFiles = paste0(tumourname[sampleidx], "_impute_output_chr", 1:length(chrom_names), "_allHaplotypeInfo.txt"), + bafsegmented_file = paste0(tumourname[sampleidx], ".BAFsegmented.txt"), + outprefix = paste0(tumourname[sampleidx], "_Battenberg_phased_chr"), + chrom_names = chrom_names, + include_homozygous = F) + } + + } + + + # if this is a multisample run, combine the battenberg phasing outputs, incorporate it and resegment + if (nsamples > 1) { + print("Constructing multisample phasing") + multisamplehaplotypeprefix <- paste0(normalname, "_multisample_haplotypes_chr") + + + # Setup for parallel computing + clp = parallel::makeCluster(nthreads) + doParallel::registerDoParallel(clp) + + # Reconstruct haplotypes + # mclapply(1:length(chrom_names), function(chrom) { + foreach::foreach (chrom=1:length(chrom_names)) %dopar% { + print(chrom) + + get_multisample_phasing(chrom = chrom, + bbphasingprefixes = paste0(tumourname, "_Battenberg_phased_chr"), + maxlag = multisample_maxlag, + relative_weight_balanced = multisample_relative_weight_balanced, + outprefix = multisamplehaplotypeprefix) + + } + + + # continue over all samples to incorporate the multisample phasing + for (sampleidx in 1:nsamples) { + + # rename the original files without multisample phasing info + MutBAFfiles <- paste0(tumourname[sampleidx], "_chr", 1:length(chrom_names), "_heterozygousMutBAFs_haplotyped.txt") + heterozygousdatafiles <- paste0(tumourname[sampleidx], "_chr", 1:length(chrom_names), "_heterozygousData.png") + raffiles <- paste0(tumourname[sampleidx], "_RAFseg_chr", chrom_names, ".png") + segfiles <- paste0(tumourname[sampleidx], "_segment_chr", chrom_names, ".png") + haplotypedandbafsegmentedfiles <- paste0(tumourname[sampleidx], c("_heterozygousMutBAFs_haplotyped.txt", ".BAFsegmented.txt")) + + file.copy(from = MutBAFfiles, to = gsub(pattern = ".txt$", replacement = "_noMulti.txt", x = MutBAFfiles), overwrite = T) + file.copy(from = heterozygousdatafiles, to = gsub(pattern = ".png$", replacement = "_noMulti.png", x = heterozygousdatafiles), overwrite = T) + file.copy(from = raffiles, to = gsub(pattern = ".png$", replacement = "_noMulti.png", x = raffiles), overwrite = T) + file.copy(from = segfiles, to = gsub(pattern = ".png$", replacement = "_noMulti.png", x = segfiles), overwrite = T) + file.copy(from = haplotypedandbafsegmentedfiles, to = gsub(pattern = ".txt$", replacement = "_noMulti.txt", x = haplotypedandbafsegmentedfiles), overwrite = T) + # done renaming, next sections will overwrite orignals + + + foreach::foreach (chrom=1:length(chrom_names)) %dopar% { + print(chrom) + + input_known_haplotypes(chrom = chrom, + chrom_names = chrom_names, + imputedHaplotypeFile = paste0(tumourname[sampleidx], "_impute_output_chr", chrom, "_allHaplotypeInfo.txt"), + externalHaplotypeFile = paste0(multisamplehaplotypeprefix, chrom, ".vcf"), + oldfilesuffix = "_noMulti.txt") + + GetChromosomeBAFs(chrom=chrom, + SNP_file=paste0(tumourname[sampleidx], "_alleleFrequencies_chr", chrom, ".txt"), + haplotypeFile=paste0(tumourname[sampleidx], "_impute_output_chr", chrom, "_allHaplotypeInfo.txt"), + samplename=tumourname[sampleidx], + outfile=paste0(tumourname[sampleidx], "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt"), + chr_names=chrom_names, + minCounts=min_normal_depth) + + # Plot what we have until this point + plot.haplotype.data(haplotyped.baf.file=paste0(tumourname[sampleidx], "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt"), + imageFileName=paste0(tumourname[sampleidx],"_chr",chrom,"_heterozygousData.png"), + samplename=tumourname[sampleidx], + chrom=chrom, + chr_names=chrom_names) + } + + } + + # Kill the threads as from here its single core + parallel::stopCluster(clp) + + for (sampleidx in 1:nsamples) { + + # Combine all the BAF output into a single file + combine.baf.files(inputfile.prefix=paste0(tumourname[sampleidx], "_chr"), + inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", + outputfile=paste0(tumourname[sampleidx], "_heterozygousMutBAFs_haplotyped.txt"), + no.chrs=length(chrom_names)) + + } + + # Segment the phased and haplotyped BAF data + segment.baf.phased.multisample(samplename=tumourname, + inputfile=paste(tumourname, "_heterozygousMutBAFs_haplotyped.txt", sep=""), + outputfile=paste(tumourname, ".BAFsegmented.txt", sep=""), + prior_breakpoints_file=prior_breakpoints_file, + gamma=segmentation_gamma_multisample, + calc_seg_baf_option=calc_seg_baf_option) + + } + + + # Setup for parallel computing + clp = parallel::makeCluster(min(nthreads, nsamples)) + doParallel::registerDoParallel(clp) + + # for (sampleidx in 1:nsamples) { + foreach::foreach (sampleidx=1:nsamples) %dopar% { + print(paste0("Fitting final copy number and calling subclones for sample ", tumourname[sampleidx])) + + if (data_type=="wgs" | data_type=="WGS") { + logr_file = paste(tumourname[sampleidx], "_mutantLogR_gcCorrected.tab", sep="") + allelecounts_file = paste(tumourname[sampleidx], "_alleleCounts.tab", sep="") + } + + # Fit a clonal copy number profile + fit.copy.number(samplename=tumourname[sampleidx], + outputfile.prefix=paste(tumourname[sampleidx], "_", sep=""), + inputfile.baf.segmented=paste(tumourname[sampleidx], ".BAFsegmented.txt", sep=""), + inputfile.baf=paste(tumourname[sampleidx],"_mutantBAF.tab", sep=""), + inputfile.logr=logr_file, + dist_choice=clonality_dist_metric, + ascat_dist_choice=ascat_dist_metric, + min.ploidy=min_ploidy, + max.ploidy=max_ploidy, + min.rho=min_rho, + min.goodness=min_goodness, + uninformative_BAF_threshold=uninformative_BAF_threshold, + gamma_param=platform_gamma, + use_preset_rho_psi=varusepresetrhopsi, + preset_rho=varpresetrho, + preset_psi=varpresetpsi, + read_depth=30) + + # Go over all segments, determine which segements are a mixture of two states and fit a second CN state + callSubclones(sample.name=tumourname[sampleidx], + baf.segmented.file=paste(tumourname[sampleidx], ".BAFsegmented.txt", sep=""), + logr.file=logr_file, + rho.psi.file=paste(tumourname[sampleidx], "_rho_and_psi.txt",sep=""), + output.file=paste(tumourname[sampleidx],"_subclones.txt", sep=""), + output.figures.prefix=paste(tumourname[sampleidx],"_subclones_chr", sep=""), + output.gw.figures.prefix=paste(tumourname[sampleidx],"_BattenbergProfile", sep=""), + masking_output_file=paste(tumourname[sampleidx], "_segment_masking_details.txt", sep=""), + prior_breakpoints_file=prior_breakpoints_file, + chr_names=chrom_names, + gamma=platform_gamma, + segmentation.gamma=NA, + siglevel=0.05, + maxdist=0.01, + noperms=1000, + calc_seg_baf_option=calc_seg_baf_option) + + # Make some post-hoc plots + make_posthoc_plots(samplename=tumourname[sampleidx], + logr_file=logr_file, + subclones_file=paste(tumourname[sampleidx], "_subclones.txt", sep=""), + rho_psi_file=paste(tumourname[sampleidx], "_rho_and_psi.txt", sep=""), + bafsegmented_file=paste(tumourname[sampleidx], ".BAFsegmented.txt", sep=""), + logrsegmented_file=paste(tumourname[sampleidx], ".logRsegmented.txt", sep=""), + allelecounts_file=allelecounts_file) + + # Save refit suggestions for a future rerun + cnfit_to_refit_suggestions(samplename=tumourname[sampleidx], + subclones_file=paste(tumourname[sampleidx], "_subclones.txt", sep=""), + rho_psi_file=paste(tumourname[sampleidx], "_rho_and_psi.txt", sep=""), + gamma_param=platform_gamma) + + } + + # Kill the threads as last part again is single core + parallel::stopCluster(clp) + + if (nsamples > 1) { + print("Assessing mirrored subclonal allelic imbalance (MSAI)") + call_multisample_MSAI(rdsprefix = multisamplehaplotypeprefix, + subclonesfiles = paste0(tumourname, "_subclones.txt"), + chrom_names = chrom_names, + tumournames = tumourname, + plotting = T) + } +} +environment(battenberg_refit) <- environment(battenberg) + + +option_list = list( + make_option(c("-t", "--tumourname"), type="character", default=NULL, help="Samplename of the tumour", metavar="character"), + make_option(c("-n", "--normalname"), type="character", default=NULL, help="Samplename of the normal", metavar="character"), + make_option(c("--tb"), type="character", default=NULL, help="Tumour BAM file", metavar="character"), + make_option(c("--nb"), type="character", default=NULL, help="Normal BAM file", metavar="character"), + make_option(c("--sex"), type="character", default=NULL, help="Sex of the sample", metavar="character"), + make_option(c("-o", "--output"), type="character", default=NULL, help="Directory where output will be written", metavar="character"), + make_option(c("--skip_allelecount"), type="logical", default=FALSE, action="store_true", help="Provide when alleles don't have to be counted. This expects allelecount files on disk", metavar="character"), + make_option(c("--skip_preprocessing"), type="logical", default=FALSE, action="store_true", help="Provide when pre-processing has previously completed. This expects the files on disk", metavar="character"), + make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), + make_option(c("--min_ploidy"), type="numeric", default=1.6, help="Minimum ploidy to be considered (Default: 1.6)", metavar="character"), + make_option(c("--max_ploidy"), type="numeric", default=4.8, help="Maximum ploidy to be considered (Default: 4.8)", metavar="character"), + make_option(c("--min_rho"), type="numeric", default=0.1, help="Minimum purity to be considered (Default: 0.1", metavar="character"), + make_option(c("--min_goodness"), type="numeric", default=0.63, help="Minimum goodness of fit required for a purity/ploidy combination to be accepted as a solution (Default: 0.63)", metavar="character"), + make_option(c("--use_presetrhopsi"), type="logical", default=FALSE, help="Use a preset rho and psi for refitting", metavar="character"), + make_option(c("--preset_rho"), type="numeric", default=NULL, help="Preset rho", metavar="character"), + make_option(c("--preset_psi"), type="numeric", default=NULL, help="Preset psi", metavar="character"), + make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), + make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character") +) + +opt_parser = OptionParser(option_list=option_list) +opt = parse_args(opt_parser) + +TUMOURNAME = opt$tumourname +NORMALNAME = opt$normalname +NORMALBAM = opt$nb +TUMOURBAM = opt$tb +IS.MALE = opt$sex=="male" | opt$sex=="Male" +RUN_DIR = opt$output +SKIP_ALLELECOUNTING = opt$skip_allelecount +SKIP_PREPROCESSING = opt$skip_preprocessing +SKIP_PHASING = opt$skip_phasing +NTHREADS = opt$cpu +PRIOR_BREAKPOINTS_FILE = opt$bp +MIN_PLOIDY = opt$min_ploidy +MAX_PLOIDY = opt$max_ploidy +MIN_RHO = opt$min_rho +MIN_GOODNESS_OF_FIT = opt$min_goodness +VARUSEPRESETRHOPSI = opt$use_presetrhopsi +VARPRESETRHO = opt$preset_rho +VARPRESETPSI = opt$preset_psi + +############################################################################### +# 2018-11-01 +# A pure R Battenberg v2.2.9 WGS pipeline implementation. +# sd11 [at] sanger.ac.uk +############################################################################### + +# General static +IMPUTEINFOFILE = "/opt/battenberg_reference/imputation/impute_info_modified.txt" +G1000PREFIX = "/opt/battenberg_reference/1000G_loci_hg38/1kg.phase3.v5a_GRCh38nounref_allele_index_chr" +G1000PREFIX_AC = "/opt/battenberg_reference/1000G_loci_hg38/1kg.phase3.v5a_GRCh38nounref_loci_chrstring_chr" +GCCORRECTPREFIX = "/opt/battenberg_reference/GC_correction_hg38/1000G_GC_chr" +REPLICCORRECTPREFIX = "/opt/battenberg_reference/RT_correction_hg38/1000G_RT_chr" +IMPUTE_EXE = "impute2" + +PLATFORM_GAMMA = 1 +PHASING_GAMMA = 1 +SEGMENTATION_GAMMA = 10 +SEGMENTATIIN_KMIN = 3 +PHASING_KMIN = 1 +CLONALITY_DIST_METRIC = 0 +ASCAT_DIST_METRIC = 1 +BALANCED_THRESHOLD = 0.51 +MIN_NORMAL_DEPTH = 10 +MIN_BASE_QUAL = 20 +MIN_MAP_QUAL = 35 +CALC_SEG_BAF_OPTION = 1 + +# WGS specific static +ALLELECOUNTER = "alleleCounter" +PROBLEMLOCI = "/opt/battenberg_reference/probloci/probloci.txt" +USEBEAGLE = T +GENOME_VERSION = "hg38" +BEAGLE_BASEDIR = "/opt/battenberg_reference/beagle5" +BEAGLEJAR = file.path(BEAGLE_BASEDIR, "beagle.24Aug19.3e8.jar") +BEAGLEREF.template = file.path(BEAGLE_BASEDIR, GENOME_VERSION, "chrCHROMNAME.1kg.phase3.v5a_GRCh38nounref.vcf.gz") +BEAGLEPLINK.template = file.path(BEAGLE_BASEDIR, GENOME_VERSION, "plink.chrCHROMNAME.GRCh38.map") +JAVAJRE = "java" + + +# Change to work directory and load the chromosome information +setwd(RUN_DIR) + +battenberg_refit(varusepresetrhopsi=VARUSEPRESETRHOPSI, + varpresetrho=VARPRESETRHO, + varpresetpsi=VARPRESETPSI, + tumourname=TUMOURNAME, + normalname=NORMALNAME, + tumour_data_file=TUMOURBAM, + normal_data_file=NORMALBAM, + ismale=IS.MALE, + imputeinfofile=IMPUTEINFOFILE, + g1000prefix=G1000PREFIX, + g1000allelesprefix=G1000PREFIX_AC, + gccorrectprefix=GCCORRECTPREFIX, + repliccorrectprefix=REPLICCORRECTPREFIX, + problemloci=PROBLEMLOCI, + data_type="wgs", + impute_exe=IMPUTE_EXE, + allelecounter_exe=ALLELECOUNTER, + usebeagle=USEBEAGLE, + beaglejar=BEAGLEJAR, + beagleref=BEAGLEREF.template, + beagleplink=BEAGLEPLINK.template, + beaglemaxmem=10, + beaglenthreads=1, + beaglewindow=40, + beagleoverlap=4, + javajre=JAVAJRE, + nthreads=NTHREADS, + platform_gamma=PLATFORM_GAMMA, + phasing_gamma=PHASING_GAMMA, + segmentation_gamma=SEGMENTATION_GAMMA, + segmentation_kmin=SEGMENTATIIN_KMIN, + phasing_kmin=PHASING_KMIN, + clonality_dist_metric=CLONALITY_DIST_METRIC, + ascat_dist_metric=ASCAT_DIST_METRIC, + min_ploidy=MIN_PLOIDY, + max_ploidy=MAX_PLOIDY, + min_rho=MIN_RHO, + min_goodness=MIN_GOODNESS_OF_FIT, + uninformative_BAF_threshold=BALANCED_THRESHOLD, + min_normal_depth=MIN_NORMAL_DEPTH, + min_base_qual=MIN_BASE_QUAL, + min_map_qual=MIN_MAP_QUAL, + calc_seg_baf_option=CALC_SEG_BAF_OPTION, + skip_allele_counting=SKIP_ALLELECOUNTING, + skip_preprocessing=SKIP_PREPROCESSING, + skip_phasing=SKIP_PHASING, + prior_breakpoints_file=PRIOR_BREAKPOINTS_FILE) +