Skip to content

Commit ef7556c

Browse files
committed
Remove lowMemory as its part of bambu, add clustering
add main.nf
1 parent f957ea2 commit ef7556c

File tree

3 files changed

+95
-189
lines changed

3 files changed

+95
-189
lines changed

Diff for: main.nf

+66-187
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,20 @@ nextflow.enable.dsl=2
44

55
params.chemistry = "custom" //"10x3v3" "10x3v2" "10x5v2"
66
params.technology = 'ONT' //"ONT" "PacBio"
7-
params.whitelist = "null"
7+
params.whitelist = "NULL"
88
params.bambuPath = "bambu"
9-
params.lowMemory = false
9+
params.lowMemory = "FALSE"
1010
params.ncore = 4
11-
params.spatial = false
11+
params.spatial = "FALSE"
1212

1313
params.NDR = "NULL"
1414
params.cleanReads = "TRUE"
15-
params.keepChimericReads = false
15+
params.keepChimericReads = "FALSE"
1616
params.deduplicateUMIs = "TRUE"
1717
params.bambuParams = tuple(params.cleanReads, params.keepChimericReads, params.deduplicateUMIs)
1818
params.barcodeMap = "TRUE"
19+
params.clusters = "NULL"
20+
params.resolution = 0.8
1921
//params.reads = false
2022
//params.bams = false
2123

@@ -111,6 +113,9 @@ process bambu_discovery{
111113
tuple val(cleanReads), val(keepChimericReads), val(deduplicateUMIs)
112114
val(NDR)
113115
val(barcode_map)
116+
val(whitelist)
117+
val(lowMemory)
118+
114119

115120
output:
116121
tuple val ('combined'), path ('*readClassFile.rds'), path ('*quantData.rds')
@@ -154,7 +159,7 @@ process bambu_discovery{
154159
annotations <- prepareAnnotations("$annotation")
155160
156161
# Transcript discovery and generate readGrgList for each cell
157-
readClassFile = bambu(reads = samples, annotations = annotations, genome = "$genome", ncore = $params.ncore, discovery = FALSE, quant = FALSE, demultiplexed = barcode_maps, verbose = TRUE, assignDist = FALSE, lowMemory = TRUE, yieldSize = 10000000, sampleNames = ids, cleanReads = as.logical($cleanReads), dedupUMI = as.logical($deduplicateUMIs))
162+
readClassFile = bambu(reads = samples, annotations = annotations, genome = "$genome", ncore = $params.ncore, discovery = FALSE, quant = FALSE, demultiplexed = barcode_maps, verbose = TRUE, assignDist = FALSE, lowMemory = $params.lowMemory, yieldSize = 10000000, sampleNames = ids, cleanReads = as.logical($cleanReads), dedupUMI = as.logical($deduplicateUMIs))
158163
saveRDS(readClassFile, paste0(runName, "_readClassFile.rds"))
159164
if(isFALSE($NDR)){
160165
extendedAnno = bambu(reads = readClassFile, annotations = annotations, genome = "$genome", ncore = $params.ncore, discovery = TRUE, quant = FALSE, demultiplexed = TRUE, verbose = FALSE, assignDist = FALSE)
@@ -167,121 +172,12 @@ process bambu_discovery{
167172
writeToGTF(extendedAnno_NDR1, paste0(runName, "extended_annotations_NDR1.gtf"))
168173
rm(extendedAnno_NDR1)
169174
rm(annotations)
170-
se = bambu(reads = readClassFile, annotations = extendedAnno, genome = "$genome", ncore = $params.ncore, discovery = FALSE, quant = FALSE, demultiplexed = TRUE, verbose = FALSE, opt.em = list(degradationBias = FALSE), assignDist = TRUE)
175+
se = bambu(reads = readClassFile, annotations = extendedAnno, genome = "$genome", ncore = $params.ncore, discovery = FALSE, quant = FALSE, demultiplexed = TRUE, verbose = FALSE, opt.em = list(degradationBias = FALSE), assignDist = TRUE, spatial = $whitelist)
171176
saveRDS(se, paste0(runName, "_quantData.rds"))
172177
#write(runName, "runName.txt")
173178
"""
174179
}
175180

176-
process bambu_readClassConstruction{
177-
cpus params.ncore
178-
maxForks 1
179-
180-
input:
181-
tuple val(runName),path(bam)
182-
path(genome)
183-
path(annotation)
184-
val(bambuPath)
185-
tuple val(cleanReads),val(keepChimericReads), val(deduplicateUMIs)
186-
187-
output:
188-
tuple val(runName), path ('*readClassFile.rds')
189-
190-
script:
191-
"""
192-
#!/usr/bin/env Rscript
193-
194-
library(devtools)
195-
if("$bambuPath" == "bambu") {
196-
load_all("/mnt/software/bambu")
197-
} else {
198-
load_all("$bambuPath")
199-
}
200-
annotations <- prepareAnnotations("$annotation")
201-
readClassFile = bambu(reads = "$bam", annotations = annotations, genome = "$genome", ncore = $params.ncore, discovery = FALSE, quant = FALSE, demultiplexed = TRUE, verbose = TRUE, assignDist = FALSE, lowMemory = TRUE, yieldSize = 10000000, cleanReads = as.logical($cleanReads), dedupUMI = as.logical($deduplicateUMIs))
202-
saveRDS(readClassFile, paste0("$runName", "_readClassFile.rds"))
203-
"""
204-
205-
}
206-
207-
process bambu_extend{
208-
publishDir "$params.outdir", mode: 'copy'
209-
210-
cpus params.ncore
211-
maxForks params.ncore
212-
213-
input:
214-
path(readClassFile)
215-
path(genome)
216-
path(annotation)
217-
val(bambuPath)
218-
val(NDR)
219-
220-
output:
221-
path ('*extended_annotations.rds')
222-
path ('*extended_annotations_NDR1.rds')
223-
path ('extended_annotations_NDR1.gtf')
224-
225-
script:
226-
"""
227-
#!/usr/bin/env Rscript
228-
229-
samples = "$readClassFile"
230-
samples = gsub("[][]","", gsub(' ','', samples))
231-
samples = unlist(strsplit(samples, ','))
232-
print(samples)
233-
234-
library(devtools)
235-
if("$bambuPath" == "bambu") {
236-
load_all("/mnt/software/bambu")
237-
} else {
238-
load_all("$bambuPath")
239-
}
240-
annotations <- prepareAnnotations("$annotation")
241-
if(isFALSE($NDR)){
242-
extendedAnno = bambu(reads = samples, annotations = annotations, genome = "$genome", ncore = $params.ncore, discovery = TRUE, quant = FALSE, demultiplexed = TRUE, verbose = FALSE, assignDist = FALSE)
243-
} else{
244-
extendedAnno = bambu(reads = samples, annotations = annotations, genome = "$genome", ncore = $params.ncore, discovery = TRUE, quant = FALSE, demultiplexed = TRUE, verbose = FALSE, assignDist = FALSE, NDR = $NDR)
245-
}
246-
saveRDS(extendedAnno, "_extended_annotations.rds")
247-
extendedAnno_NDR1 = bambu(reads = samples, annotations = annotations, genome = "$genome", NDR = 1, ncore = $params.ncore, discovery = TRUE, quant = FALSE, demultiplexed = TRUE, verbose = FALSE, assignDist = FALSE)
248-
saveRDS(extendedAnno_NDR1, "_extended_annotations_NDR1.rds")
249-
writeToGTF(extendedAnno_NDR1, "extended_annotations_NDR1.gtf")
250-
"""
251-
252-
}
253-
254-
process bambu_assignDist{
255-
cpus params.ncore
256-
maxForks params.ncore
257-
258-
input:
259-
tuple val(runName),path(readClassFile)
260-
path(extendedAnno)
261-
path(extendedAnno_NDR1)
262-
path(genome)
263-
val(bambuPath)
264-
265-
output:
266-
tuple val(runName), path ('*quantData.rds'), emit: quantData
267-
268-
script:
269-
"""
270-
#!/usr/bin/env Rscript
271-
272-
library(devtools)
273-
if("$bambuPath" == "bambu") {
274-
load_all("/mnt/software/bambu")
275-
} else {
276-
library(devtools)
277-
load_all("$bambuPath")
278-
}
279-
extendedAnno <- readRDS("$extendedAnno")
280-
se = bambu(reads = "$readClassFile", annotations = extendedAnno, genome = "$genome", ncore = $params.ncore, discovery = FALSE, quant = FALSE, demultiplexed = TRUE, verbose = FALSE, opt.em = list(degradationBias = FALSE), assignDist = TRUE)
281-
saveRDS(se, paste0("$runName", "_quantData.rds"))
282-
"""
283-
}
284-
285181
process bambu_quant{
286182

287183
publishDir "$params.outdir", mode: 'copy'
@@ -296,7 +192,8 @@ process bambu_quant{
296192
path(extended_annotations_NDR1_gtf)
297193
path(genome)
298194
val(bambuPath)
299-
val(whitelist)
195+
val(clusters)
196+
val(resolution)
300197

301198
output:
302199
path ('*.rds')
@@ -307,25 +204,50 @@ process bambu_quant{
307204
script:
308205
"""
309206
#!/usr/bin/env Rscript
207+
#.libPaths("/usr/local/lib/R/site-library")
310208
library(devtools)
311209
if("$bambuPath" == "bambu") {
312210
load_all("/mnt/software/bambu")
313211
} else {
314-
library(devtools)
315212
load_all("$bambuPath")
316213
}
317214
if(".txt" %in% "$sample"){runName = readLines("$sample")
318215
} else{runName = "$sample"}
319216
320217
extendedAnno <- readRDS("$extendedAnno")
321-
quantData = readRDS("$quantData")
322-
if("$whitelist" == FALSE){
323-
se = bambu(reads = "test.rds", annotations = extendedAnno, genome = "$genome", quantData = quantData, assignDist = FALSE, ncore = $params.ncore, discovery = FALSE, quant = TRUE, demultiplexed = TRUE, verbose = FALSE, opt.em = list(degradationBias = FALSE))
324-
} else{
325-
se = bambu(reads = "test.rds", annotations = extendedAnno, genome = "$genome", quantData = quantData, assignDist = FALSE, ncore = $params.ncore, spatial = "$whitelist", discovery = FALSE, quant = TRUE, demultiplexed = TRUE, verbose = FALSE, opt.em = list(degradationBias = FALSE))
218+
quantDatas = readRDS("$quantData")
219+
220+
#if no clustering provided, automatically cluster
221+
if(is.null($clusters)){
222+
clusters = list()
223+
cellMixs = list()
224+
source("${projectDir}/utilityFunctions.R")
225+
for(quantData in quantDatas){
226+
quantData.gene = transcriptToGeneExpression(quantData)
227+
counts = assays(quantData.gene)\$counts
228+
cellMix = clusterCells(counts, resolution = $resolution)
229+
x = setNames(names([email protected]), [email protected])
230+
clusters = c(clusters, splitAsList(unname(x), names(x)))
231+
cellMixs = c(cellMixs, cellMix)
232+
}
233+
saveRDS(clusters, paste0(runName, "_clusters.rds"))
234+
saveRDS(cellMixs, paste0(runName, "_cellMixs.rds"))
326235
}
327-
saveRDS(se, paste0(runName, "_se.rds"))
328236
237+
se = bambu( reads = "test.rds",
238+
annotations = extendedAnno,
239+
genome = "$genome",
240+
quantData = quantData,
241+
assignDist = FALSE,
242+
ncore = $params.ncore,
243+
discovery = FALSE,
244+
quant = TRUE,
245+
demultiplexed = TRUE,
246+
verbose = FALSE,
247+
opt.em = list(degradationBias = FALSE),
248+
clusters = clusters)
249+
250+
saveRDS(se, paste0(runName, "_se.rds"))
329251
writeBambuOutput(se, path = ".", prefix = paste0(runName, "_sparse_"))
330252
"""
331253

@@ -345,7 +267,6 @@ process fusion_mode{
345267
// This is the workflow to execute the process
346268
workflow {
347269
if (params.reads) {
348-
349270
//User can provide either 1 .fastq file or a .csv with .fastq files
350271
fastq = file(params.reads, checkIfExists:true)
351272
if(fastq.getExtension() == "csv") {
@@ -373,10 +294,10 @@ workflow {
373294
minimap_out_ch = minimap(flexiplex_out_ch, "$params.genome")
374295
//params.bams = minimap_out_ch
375296
}
376-
377-
}
378-
379-
if(!params.reads) {
297+
sampleIds = minimap_out_ch.collect{it[0]}
298+
bamsFiles = minimap_out_ch.collect{it[1]}
299+
}
300+
else{ //When starting from bam
380301
bam = file(params.bams, checkIfExists:true)
381302
if(bam.getExtension() == "csv") {
382303
bamsChannel = Channel.fromPath(params.bams) \
@@ -391,68 +312,26 @@ workflow {
391312
bamsChannel = bamsChannel
392313
.map {["Bambu", it]}
393314
}
394-
if(params.lowMemory){
395-
//bambuTxDisc_out_ch = bambu_discovery(bamsChannel, "$params.genome", "$params.annotation")
396-
//bambuQuant_out_ch = bambu_quant(bambuTxDisc_out_ch, "$params.genome")
397-
readClassConstruction_out_ch = bambu_readClassConstruction(bamsChannel, "$params.genome", "$params.annotation", "$params.bambuPath", params.bambuParams)
398-
extend_out_ch = bambu_extend(readClassConstruction_out_ch.collect{it[1]},"$params.genome","$params.annotation", "$params.bambuPath", "$params.NDR")
399-
assignDist_out_ch = bambu_assignDist(readClassConstruction_out_ch, extend_out_ch, "$params.genome", "$params.bambuPath")
400-
readClassConstruction_out_ch
401-
.join(assignDist_out_ch, by:0)
402-
.set { bambuQuant_inputs }
403-
bambuQuant_out_ch = bambu_quant(bambuQuant_inputs, extend_out_ch, "$params.genome", "$params.bambuPath", "$params.spatial")
404-
}
405-
else{
406-
sampleIds = bamsChannel.collect{it[0]}
407-
bamsFiles = bamsChannel.collect{it[1]}
408-
barcodeMaps = bamsChannel.collect{it[2]}
409-
whiteLists = bamsChannel.collect{it[3]}
410-
411-
barcodeMaps2 = barcodeMaps.map { it == null ? it : "TRUE" }
412-
413-
if(params.whitelist == "null" & whiteLists.any { it == '' }){
414-
echo "Missing whitelist entries in samplesheet or no --whitelist provided"
415-
exit 1
416-
}
417-
whiteLists2 = params.whitelist
418-
if(whiteLists2 != "null"){
419-
whiteLists2 = whiteLists { it == '' ? params.whitelist : it }
420-
}
421-
422-
423-
bambuTxDisc_out_ch = bambu_discovery(sampleIds, bamsFiles, "$params.genome", "$params.annotation", "$params.bambuPath", params.bambuParams,"$params.NDR",barcodeMaps2)
424-
if(params.spatial){
425-
bambuQuant_out_ch = bambu_quant(bambuTxDisc_out_ch, "$params.genome", "$params.bambuPath", whiteLists2)
426-
}
427-
else{
428-
bambuQuant_out_ch = bambu_quant(bambuTxDisc_out_ch, "$params.genome", "$params.bambuPath", "FALSE")
429-
}
430-
}
315+
sampleIds = bamsChannel.collect{it[0]}
316+
bamsFiles = bamsChannel.collect{it[1]}
317+
barcodeMaps = bamsChannel.collect{it[2]}
318+
whiteLists = bamsChannel.collect{it[3]}
431319

432-
}
433-
else{ //When preprocessing steps were run
434-
if(params.lowMemory){
435-
//bambuTxDisc_out_ch = bambu_discovery(params.bams, "$params.genome", "$params.annotation")
436-
//bambuQuant_out_ch = bambu_quant(bambuTxDisc_out_ch, "$params.genome")
437-
readClassConstruction_out_ch = bambu_readClassConstruction(params.bams, "$params.genome", "$params.annotation", "$params.bambuPath", params.bambuParams)
438-
extend_out_ch = bambu_extend(readClassConstruction_out_ch.collect{it[1]},"$params.genome","$params.annotation", "$params.bambuPath", "$params.NDR")
439-
assignDist_out_ch = bambu_assignDist(readClassConstruction_out_ch, extend_out_ch, "$params.genome", "$params.bambuPath")
440-
readClassConstruction_out_ch
441-
.join(assignDist_out_ch, by:0)
442-
.set { bambuQuant_inputs }
443-
bambuQuant_out_ch = bambu_quant(bambuQuant_inputs, extend_out_ch, "$params.genome", "$params.bambuPath", "$params.spatial") }
444-
else{
445-
sampleIds = minimap_out_ch.collect{it[0]}
446-
bamsFiles = minimap_out_ch.collect{it[1]}
447-
bambuTxDisc_out_ch = bambu_discovery(sampleIds, bamsFiles, "$params.genome", "$params.annotation", "$params.bambuPath", params.bambuParams,"$params.NDR", params.barcodeMap)
448-
if(params.spatial){
449-
bambuQuant_out_ch = bambu_quant(bambuTxDisc_out_ch, "$params.genome", "$params.bambuPath", params.whitelist)
450-
}
451-
else{
452-
bambuQuant_out_ch = bambu_quant(bambuTxDisc_out_ch, "$params.genome", "$params.bambuPath", "FALSE")
453-
}
320+
barcodeMaps2 = barcodeMaps.map { it == null ? it : "TRUE" }
321+
322+
if(params.whitelist == "null" & whiteLists.any { it == '' }){
323+
echo "Missing whitelist entries in samplesheet or no --whitelist provided"
324+
exit 1
454325
}
326+
whiteLists2 = params.whitelist
327+
//if(whiteLists2 != "null"){
328+
// whiteLists2 = whiteLists { it == '' ? params.whitelist : it }
329+
//} else{
330+
// whileLists2 = "FALSE"
331+
//}
455332
}
333+
bambuTxDisc_out_ch = bambu_discovery(sampleIds, bamsFiles, "$params.genome", "$params.annotation", "$params.bambuPath", params.bambuParams,"$params.NDR",barcodeMaps2, whiteLists2, "$params.lowMemory")
334+
bambuQuant_out_ch = bambu_quant(bambuTxDisc_out_ch, "$params.genome", "$params.bambuPath", "$params.clusters", "$params.resolution")
456335
}
457336

458337

Diff for: nextflow.config

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
singularity {
22
enabled = true
3-
singularity.enabled = true
4-
singularity.autoMounts = true
3+
autoMounts = true
4+
runOptions = "--bind $PWD"
5+
singularity.envWhitelist = "JAVA_HOME"
6+
SINGULARITYENV_R_LIBS_USER="/usr/local/lib/R/site-library"
7+
SINGULARITYENV_R_ENVIRON_USER="NULL"
58
}

Diff for: utilityFunctions.R

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
if("Seurat" %in% rownames(installed.packages()) == FALSE) {remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE)}
2+
install.packages("SeuratObject")
3+
install.packages("Seurat")
4+
library(Seurat)
5+
6+
clusterCells = function(counts, resolution = 0.8, dims = 1:15){
7+
8+
cellMix <- CreateSeuratObject(counts = counts,
9+
project = "cellMix", min.cells = 1)#, min.features = 200)
10+
#cellMix <- subset(cellMix, subset = nFeature_RNA > nFeature_RNA_threshold & nFeature_RNA < nFeature_RNA_threshold_max)
11+
#nFeature_RNA_threshold = 1000, nFeature_RNA_threshold_max = 9000,
12+
cellMix <- NormalizeData(cellMix, normalization.method = "LogNormalize", scale.factor = 10000)
13+
cellMix <- FindVariableFeatures(cellMix, selection.method = "vst", nfeatures = 2500)
14+
all.genes <- rownames(cellMix)
15+
cellMix <- ScaleData(cellMix, features = all.genes)
16+
cellMix <- RunPCA(cellMix, features = VariableFeatures(object = cellMix))
17+
dim = dim(cellMix@reductions$pca)[2]
18+
if(dim < 15){ dims = 1:dim}
19+
cellMix <- FindNeighbors(cellMix, dims = dims)
20+
cellMix <- FindClusters(cellMix, resolution = resolution)
21+
cellMix <- RunUMAP(cellMix, dims = dims)
22+
23+
return(cellMix)
24+
}

0 commit comments

Comments
 (0)