Skip to content

Commit e3143b4

Browse files
committed
add pipeline dir to organize execution
1 parent 6e88ef2 commit e3143b4

39 files changed

+152
-138
lines changed

.gitignore

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
# Directories to ignore
22
.nextflow/
3-
data/
4-
results/
5-
results-backups/
3+
**/data/
4+
**/results/
65
work/
76

87
# Files to ignore

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ The pipeline takes FASTQ files as input, performs initial MD5 checks, quality co
2727
* 5c. Produce heatmap visualizations
2828
* 6a. Execute Wormcat Batch (`wormcat_batch.nf`)
2929

30-
30+
Note: when running use `nextflow run PIPLINE.nf -bg -N [email protected]`, which will run in the background and email when the process terminates (success or failure)
3131

3232
## Pipeline Outputs
3333

bin/expression_summary.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,20 @@ def extract_salmon_experiment_name(file_path):
1616
experiment_name = directory_path[index:]
1717
return experiment_name
1818

19-
def find_files_with_suffix(directory, suffix):
19+
def find_files_with_suffix(directory, dir_prefix, file_suffix):
2020
matching_files = []
21-
for root, dirs, files in os.walk(directory):
22-
for file in files:
23-
if file.endswith(suffix):
24-
matching_files.append(os.path.join(root, file))
21+
for root, dirs, files in os.walk(directory,followlinks=True):
22+
if dir_prefix in root:
23+
print(f"{root=}")
24+
for file in files:
25+
if file.endswith(file_suffix):
26+
matching_files.append(os.path.join(root, file))
2527
return matching_files
2628

2729
def aggregate_expression_counts(input_path, execution_variables):
2830
experiment_data_dfs = []
2931

30-
results_files = find_files_with_suffix(input_path, execution_variables['file_suffix'])
32+
results_files = find_files_with_suffix(input_path, execution_variables['dir_prefix'], execution_variables['file_suffix'])
3133
# Read in all the individual results from RSEM
3234
for result_file in results_files:
3335
df = pd.read_csv(result_file, delimiter='\t')
@@ -59,12 +61,14 @@ def main():
5961
cmd_line_msg = "expression_summary.py --expression-type [rsem | salmon] --input-path [<base_directory>]"
6062
execution_variables = {
6163
'rsem':{'output_file':"genes_expression_expected_count.tsv",
64+
'dir_prefix':'rsem_expression_',
6265
'file_suffix':'genes.results',
6366
'columns_to_keep':['gene_id', 'expected_count'],
6467
'expression_type' : 'rsem',
6568
'extract_experiment_name': extract_rsem_experiment_name
6669
},
6770
'salmon':{'output_file':"transcript_expression_counts.tsv",
71+
'dir_prefix':'salmon_expression_',
6872
'file_suffix':'quant.sf',
6973
'columns_to_keep':['Name', 'NumReads'],
7074
'expression_type' : 'salmon',

bin/get_dropbox_data.sh

-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ length=${#directories[@]}
2020
# Reset the IFS to its default value (space)
2121
IFS=" "
2222

23-
2423
# Iterate over the directories and provide the time it takes to copy
2524
# also provide info on the number of files to copy and how many have been copies so far
2625
counter=0

create_salmon_index.nf

-35
This file was deleted.

get_dropbox_data-mike.nf

-42
This file was deleted.

modules/bedtools/main.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
params.outdir = 'results'
21

32
process DECOY_TRANSCRIPTOME {
43
container 'danhumassmed/samtools-bedtools:1.0.1'

modules/de-seq-tools/main.nf

+14-14
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
params.outdir = 'results'
21

32
process TX2GENE {
43
tag "$transcriptome.simpleName"
@@ -34,41 +33,42 @@ process TXIMPORT_COUNTS {
3433
script:
3534
"""
3635
mkdir -p salmon_summary
37-
tx_import.R --input-path ${input_path} --output-path salmon_summary --tx2gene ${tx2gene} --counts-method ${count_method}
36+
${launchDir}/bin/tx_import.R --input-path ${input_path} --output-path salmon_summary --tx2gene ${tx2gene} --counts-method ${count_method}
3837
"""
3938
}
4039

4140
process GET_DROPBOX_DATA {
4241
container 'danhumassmed/de-seq-tools:1.0.1'
43-
publishDir baseDir, mode:'move'
42+
publishDir params.outdir, mode:'copy'
4443

4544
input:
4645
val data_remote
4746
val data_local
48-
49-
output:
50-
path "${data_local}"
51-
val "${data_local}", emit: data_local_dir
5247

5348
script:
5449
"""
5550
mkdir -p "${data_local}"
56-
get_dropbox_data.sh "${data_remote}" "${data_local}"
51+
${launchDir}/bin/get_dropbox_data.sh "${data_remote}" "${data_local}"
5752
"""
53+
54+
output:
55+
path "${data_local}"
56+
5857
}
5958

6059
process CHECK_MD5 {
6160
container 'danhumassmed/de-seq-tools:1.0.1'
62-
publishDir params.outdir, mode:'copy'
61+
publishDir params.reportdir, mode:'copy'
6362

6463
input:
65-
val data_local
64+
path data_local
6665

67-
output:
68-
path "md5_report.html"
69-
7066
script:
7167
"""
72-
check_md5.py "${baseDir}/${data_local}"
68+
${launchDir}/bin/check_md5.py "${data_local}"
7369
"""
70+
71+
output:
72+
path "md5_report.html"
73+
7474
}

modules/fastqc/main.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
params.outdir = 'results'
21

32
process FASTQC {
43
tag "FASTQC on $sample_id"

modules/multiqc/main.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
params.outdir = 'results'
21

32
process MULTIQC {
43
container 'danhumassmed/fastqc-multiqc:1.0.1'

modules/rsem/main.nf

+2-2
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ process RSEM_SUMMARY {
8181
publishDir params.outdir, mode:'copy'
8282

8383
input:
84-
path('*')
84+
path('rsem_expression_*')
8585

8686
output:
8787
path "rsem_summary"
@@ -90,6 +90,6 @@ process RSEM_SUMMARY {
9090
"""
9191
mkdir -p rsem_summary
9292
cd rsem_summary
93-
expression_summary.py --expression-type rsem --input-path "${baseDir}/${params.outdir}"
93+
${launchDir}/bin/expression_summary.py --expression-type rsem --input-path ..
9494
"""
9595
}

modules/salmon/main.nf

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ process SALMON_QUANTIFY_SINGLE {
2626
path reads
2727

2828
output:
29-
path "./salmon_expression_${reads.getName().split("\\.")[0]}"
29+
path "salmon_expression_${reads.getName().split("\\.")[0]}"
3030

3131
script:
3232
"""
@@ -44,7 +44,7 @@ process SALMON_QUANTIFY{
4444
tuple val(pair_id), path(reads)
4545

4646
output:
47-
path "./salmon_expression_${pair_id}"
47+
path "salmon_expression_${pair_id}"
4848

4949
script:
5050
"""

modules/star/main.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
params.outdir = 'results'
21

32
process STAR_INDEX {
43
container "danhumassmed/star-rsem:1.0.1"

modules/sub-workflow/index-salmon.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
params.outdir = 'results'
21

32
// import modules
43
include { DECOY_TRANSCRIPTOME } from '../bedtools'

modules/sub-workflow/index-star-rsem.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
params.outdir = 'results'
21

32
// import modules
43
include { STAR_INDEX } from '../star'

modules/sub-workflow/rnaseq-salmon.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
params.outdir = 'results'
21

32
include { FASTQC; FASTQC_SINGLE } from '../fastqc'
43
include { SALMON_QUANTIFY; SALMON_QUANTIFY_SINGLE; SALMON_SUMMARY } from '../salmon'

modules/sub-workflow/rnaseq-star-rsem.nf

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
params.outdir = 'results'
2-
31

42
include { FASTQC; FASTQC_SINGLE } from '../fastqc'
53
include { STAR_ALIGN; STAR_ALIGN_SINGLE } from '../star'

modules/trimmomatic/main.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
params.outdir = 'results'
21

32
//http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf
43

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
* enables modules
5+
*/
6+
nextflow.enable.dsl = 2
7+
8+
/*
9+
* RNA SEQ Pipeline optimized for Alex Byrne
10+
*/
11+
12+
// rclone lsd remote:"Francis lab_KB_wholeworm RNAseq data_March 2023_Share AW lab"
13+
// rclone lsd remote:"SamLiu_ Francis lab September 2023"
14+
15+
//params.data_remote="Francis lab_KB_wholeworm RNAseq data_March 2023_Share AW lab/August 2023 experiment"
16+
//params.data_remote="Francis lab_KB_wholeworm RNAseq data_March 2023_Share AW lab/March 2023 experiment"
17+
params.data_remote="SamLiu_ Francis lab September 2023"
18+
params.data_local="Experiment3"
19+
params.outdir = "${projectDir}/data"
20+
params.reportdir = "${params.outdir}/${params.data_local}"
21+
22+
23+
24+
log.info """\
25+
R N A S E Q - N F P I P E L I N E
26+
===================================
27+
data_remote : ${params.data_remote}
28+
outdir : ${params.outdir}
29+
project_dir : ${projectDir}
30+
launch_dir : ${launchDir}
31+
"""
32+
33+
/*
34+
* main script flow
35+
*/
36+
37+
include { GET_DROPBOX_DATA } from "${launchDir}/modules/de-seq-tools"
38+
include { CHECK_MD5 } from "${launchDir}/modules/de-seq-tools"
39+
40+
workflow {
41+
GET_DROPBOX_DATA(params.data_remote, params.data_local)
42+
//CHECK_MD5(GET_DROPBOX_DATA.out.collect())
43+
}
44+
45+
workflow.onComplete {
46+
log.info ( workflow.success ? "\nDone! The data is avialable --> ${params.reportdir}\n" : "Oops .. something went wrong" )
47+
}
48+
49+

rnaseq-rsem-mike.nf pipelines/mike_francis/rnaseq-rsem-mike.nf

+11-9
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ nextflow.enable.dsl = 2
1111
* RNA SEQ Pipeline optimized for Alex Byrne
1212
*/
1313

14-
//params.reads = "${baseDir}/data/mike_francis/**/*_{1,2}.fq.gz"
15-
params.reads = "${baseDir}/results/trimmed_sliding_window/**/*_{1,2}.fq.gz"
16-
params.star_index_dir="${baseDir}/results/star_index"
17-
params.rsem_reference_dir = "${baseDir}/results/rsem_index"
18-
params.outdir = "results"
14+
//params.reads = "${projectDir}/data/mike_francis/**/*_{1,2}.fq.gz"
15+
params.reads = "${projectDir}/data/Experiment3/**/*_{1,2}.fq.gz"
16+
params.star_index_dir="${launchDir}/pipelines/shared/results/star_index"
17+
params.rsem_reference_dir = "${launchDir}/pipelines/shared/results/rsem_index"
18+
params.outdir = "${projectDir}/results"
19+
1920

2021
log.info """\
2122
R N A S E Q - N F P I P E L I N E
@@ -24,12 +25,13 @@ log.info """\
2425
star_index_dir : ${params.star_index_dir}
2526
rsem_reference_dir : ${params.rsem_reference_dir}
2627
outdir : ${params.outdir}
27-
base_dir : ${baseDir}
28+
project_dir : ${projectDir}
29+
launch_dir : ${launchDir}
2830
"""
2931

3032
// import modules
31-
include { RNASEQ_STAR_RSEM } from './modules/sub-workflow/rnaseq-star-rsem'
32-
include { MULTIQC } from './modules/multiqc'
33+
include { RNASEQ_STAR_RSEM } from "${launchDir}/modules/sub-workflow/rnaseq-star-rsem"
34+
include { MULTIQC } from "${launchDir}/modules/multiqc"
3335

3436
/*
3537
* main script flow
@@ -42,5 +44,5 @@ workflow {
4244
}
4345

4446
workflow.onComplete {
45-
log.info ( workflow.success ? "\nDone! Open the following report in your browser --> ${baseDir}/$params.outdir/multiqc_rsem_report.html\n" : "Oops .. something went wrong" )
47+
log.info ( workflow.success ? "\nDone! Open the following report in your browser --> ${params.outdir}/multiqc_rsem_report.html\n" : "Oops .. something went wrong" )
4648
}
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)