-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsv_to_yaml.py
executable file
·122 lines (109 loc) · 5.13 KB
/
csv_to_yaml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python3
# This script allow easy generation of .yaml configuration files
# via your bash promt. Please note that all directories paths must
# specified folowing the <directory_name>/ format, and so need to
# be in the working directory. Please note that one and only one
# reference sequence (and gff for RNAseq) are accepted at a time !
# Reference must have the extention ".fna", ".fa" or ".fasta".
# Annotation must have the extention ".gff".
# If one, or more, of this conditions are not filled, this script
# will exit with an error message.
import sys
import csv
import os
from os.path import expanduser
tab = ' '
cwd = os.getcwd() + "/"
bind = expanduser("~")
# AllMine !
print ("""\33[1m\33[36m
##################################################
# AllMine, a flexible pipeline for Allele Mining #
# This software is under the MIT License #
# Copyright Thomas Bersez 2019 #
# INRA-GAFL / Paris Saclay university #
# contact: [email protected] #
##################################################
\33[0m
""")
# User interface
print("\33[93mPlease answer to the following questions to configure your AllMine run.")
print("Paths must end with a /. Press enter to confirm.\33[0m\n\n")
reads_dir = input('Path to raw read directory : ')
genome_dir = input('Path to the reference genome_dir : ')
regions = input('Path to bed file with regions of interest : ')
tech = input('DNAseq or RNAseq data (enter d or r) : ')
mode = input('Paired end or single end reads (enter p or s) : ')
bind = input('Bind path point to your home directory (absolute path) : ')
container_path = input('Absolute path to AllMine container : ')
# Parse the samples sheet in csv format
with open(sys.argv[1], 'r') as file:
reader = csv.DictReader(file, delimiter=',', quotechar='"')
with open("config.yaml", 'w') as yaml:
# DNA mode
if(tech == 'd'):
yaml.write("TECH: DNAseq \n")
files = os.listdir(cwd + genome_dir)
for file in files: # Only one fasta and one gff are accepted !
if file.endswith(".fna" or ".fa" or ".fasta"):
yaml.write("GENOME: " + file + "\n")
elif file.endswith(".gff" or ".gtf"):
yaml.write("ANO: " + file + "\n")
else:
print("\33[31mNo valid reference provided in " + genome_dir + " !\n\33[0m")
sys.exit()
# RNA mode
elif(tech == 'r'):
yaml.write("TECH: RNAseq \n")
files = os.listdir(cwd + genome_dir)
for file in files: # Only one fasta and one gff are accepted !
if file.endswith(".fna" or ".fa" or ".fasta"):
yaml.write("GENOME: " + file + "\n")
elif file.endswith(".gff" or ".gtf"):
yaml.write("ANO: " + file + "\n")
else:
print("\33[31mNo valid reference provided in " + genome_dir + " !\n\33[0m")
sys.exit()
# PAIRED END MODE
if(mode == 'p'):
yaml.write("MODE: paired \n")
yaml.write("QC: fastqc_pe.py \n")
yaml.write("FASTP: fastp_pe.py \n")
if(tech == 'd'):
yaml.write("INDEXER: bwa_index_building.py \n")
yaml.write("ALLIGNER: bwa_pe.py \n")
elif(tech == 'r'):
yaml.write("INDEXER: star_index_building.py \n")
yaml.write("ALLIGNER: star_pe_FP.py \n")
# SINGLE END MODE
elif(mode == 's'):
yaml.write("MODE: single \n")
yaml.write("QC: fastqc_se.py \n")
yaml.write("FASTP: fastp_se.py \n")
if(tech == 'd'):
yaml.write("INDEXER: bwa_index_building.py \n")
yaml.write("ALLIGNER: bwa_se.py \n")
elif(tech == 'r'):
yaml.write("INDEXER: star_index_building.py \n")
yaml.write("ALLIGNER: star_se_FP.py \n")
# Paths
yaml.write("BIND: " + bind + "\n")
yaml.write("REF: " + cwd + genome_dir + "\n")
yaml.write("RAW: " + cwd + reads_dir + "\n")
yaml.write("TRIMMED: " + cwd + "trimmed/ \n")
yaml.write("MAP: " + cwd + "mapped/ \n")
yaml.write("VAR: " + cwd + "variant/ \n")
yaml.write("REGIONS: " + cwd + regions + "\n")
yaml.write("CONT: " + container_path + "\n")
# SAMPLE objects creation
yaml.write("samples:\n")
for row in reader:
yaml.write(tab + row["filename"] + ": \n")
yaml.write(2*tab + "name: " + row["filename"] + "\n")
yaml.write(2*tab + "R1: " + cwd + reads_dir + row["filename"] + row["R1_ext"] + "\n")
if(mode == 'p'):
yaml.write(2*tab + "R2: " + cwd + reads_dir + row["filename"] + row["R2_ext"] + "\n")
yaml.write(2*tab + "platform: " + row["platform"] + "\n")
yaml.write(2*tab + "date: " + row["date(mm/dd/yy)"] + "\n")
print('\n\n\33[32mDone. Configuration file \"config.yaml\" created in the current directory.\33[0m\n')
sys.exit()