Skip to content

Commit 742861d

Browse files
authored
Create simulator.cpp
1 parent 432a3c1 commit 742861d

File tree

1 file changed

+300
-0
lines changed

1 file changed

+300
-0
lines changed

Diff for: src/simulator.cpp

+300
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
#include<stdio.h>
2+
#include<stdlib.h>
3+
#include<iostream>
4+
#include<string>
5+
#include<fstream>
6+
#include <regex>
7+
#include <map>
8+
#include <getopt.h>
9+
#include "project/simulator.h"
10+
#include "project/brute_force.h"
11+
#include "project/utils.h"
12+
13+
std::string modification_type;
14+
15+
std::string mode;
16+
17+
std::string output_path;
18+
19+
std::string fixed_hairpin = "hairpin.fixed.fa";
20+
21+
std::string fixed_hairpin_species_spec = "hairpin.fixed.species.spec.fa";
22+
23+
int times; /* Number of generated sequences of the same type */
24+
25+
std::string species = "hsa"; /* Default species for which the simulation will be performed */
26+
27+
std::vector <std::pair<string, string>> mi_mimat;
28+
29+
void helpMessage (std::string program_name) { //FIXME
30+
printf("\nUsage: %s [OPTIONS]\n", program_name.c_str());
31+
printf(" -a <file> : Path to mature.fa file. \n");
32+
printf(" -b <file> : Path to hairpin.fa file. \n");
33+
printf(" -g <file> : Path to miRBase gtf file. \n");
34+
printf(" -o <path> : Output path. \n");
35+
printf(" -s <string> : Species for the generated sequences (e.g. hsa). \n");
36+
printf(" -m <int> : Mode for the brute force execution. \n");
37+
printf(" -t <int> : Number of identical modified sequences sequences. \n");
38+
printf(" Available modes:\n");
39+
printf(" 0: Canonical miRNAs \n");
40+
printf(" 1: 5' templated addition \n");
41+
printf(" 2: 3' templated addition \n");
42+
printf(" 3: 5' non-templated addition (A and Ts) \n");
43+
printf(" 4: 3' non-templated addition (A and Ts)\n");
44+
printf(" 5: SNPs \n");
45+
printf(" 6: Indels \n");
46+
printf(" 7: 3' templated losses \n");
47+
printf(" 8: 5' templated losses \n");
48+
printf(" 9: Options 0-8 \n");
49+
printf("\n");
50+
exit(1);
51+
52+
}
53+
54+
/* Concatenate sequences lines in hairpin file. */
55+
void fixHairpinFile (std::string input_file, std::string output_file) {
56+
57+
std::ifstream input;
58+
std::ofstream output;
59+
60+
input.open(input_file.c_str());
61+
output.open(output_file.c_str());
62+
int start = 0;
63+
std::string hairpin;
64+
65+
for( std::string line; getline( input, line ); )
66+
{
67+
if( std::regex_match (line, std::regex("(>)(.*)")) ){
68+
69+
if(start) output << hairpin << "\n";
70+
71+
output << line << "\n";
72+
73+
hairpin = "";
74+
75+
}else{
76+
77+
hairpin = hairpin + line;
78+
start = 1;
79+
80+
}
81+
82+
}
83+
84+
output << hairpin << "\n"; //Write the last line
85+
output.close();
86+
}
87+
88+
//Create mapping of MI and MIMAT IDs form GTF file.
89+
void mapMItoMIMAT(std::string input, std::vector <std::pair<std::string, std::string>> &mi_mimat){
90+
91+
std::ifstream fp;
92+
fp.open(input.c_str());
93+
std::smatch match;
94+
for( std::string line; getline( fp, line ); )
95+
{
96+
if (std::regex_match (line, std::regex("(.*)(Derives_from)(.*)")))
97+
{
98+
std::vector<std::string> tab = split(line, '\t');
99+
std::vector<std::string> semicolon = split(tab[8], ';');
100+
std::string mimat = semicolon[0].substr(3,semicolon[0].size());
101+
std::string mi = semicolon[3].substr(13,semicolon[3].size());
102+
mi_mimat.emplace_back(mi, mimat);
103+
}
104+
105+
}
106+
107+
108+
}
109+
110+
void line_by_species(std::string input_file, std::string output_file){
111+
112+
int flag = 0;
113+
int counter = 1;
114+
std::ifstream input;
115+
std::ofstream output;
116+
input.open(input_file.c_str());
117+
output.open(output_file.c_str());
118+
119+
for( std::string line; getline( input, line ); )
120+
{
121+
if (line.find('>'+species) != std::string::npos) {
122+
output << line << "\n";
123+
flag = 1;
124+
}
125+
if(counter % 2 == 0 && flag == 1){ //Sequence line
126+
output << line << "\n";
127+
flag = 0;
128+
}
129+
counter++;
130+
}
131+
output.close();
132+
133+
}
134+
135+
int main(int argc, char **argv)
136+
{
137+
138+
int aflag = 0;
139+
int bflag = 0;
140+
char *cvalue = NULL;
141+
std::string fasta;
142+
int index;
143+
144+
//int opt;
145+
const char * short_opt = "hm:t:a:b:g:o:s:";
146+
struct option long_opt[] =
147+
{
148+
{"help", no_argument, nullptr, 'h'},
149+
{"mature", required_argument, nullptr, 'a'},
150+
{"hairpin", required_argument, nullptr, 'b'},
151+
{"gtf", required_argument, nullptr, 'g'},
152+
{"output", required_argument, nullptr, 'o'},
153+
{"mode", required_argument, nullptr, 'm'},
154+
{"times", required_argument, nullptr, 't'},
155+
{nullptr, no_argument, nullptr, 0 }
156+
};
157+
158+
if(argc == 1) helpMessage(argv[0]); //If no arguments have been passed
159+
160+
std::string input;
161+
std::string mature;
162+
std::string hairpin;
163+
std::string gtf;
164+
165+
int mode;
166+
167+
168+
// while((opt = getopt_long(argc, argv, short_opt, long_opt, NULL)) != -1)
169+
// {
170+
int flag = 0;
171+
while(true)
172+
{
173+
const auto opt = getopt_long(argc, argv, short_opt, long_opt, NULL);
174+
175+
if (-1 == opt)
176+
break;
177+
178+
switch(opt) //FIXME Add modes for BF, MonteCarlo +++
179+
{
180+
// case -1: /* no more arguments */
181+
// case 0: /* long options toggles */
182+
// break;
183+
184+
case 'a':
185+
mature = optarg;
186+
flag++;
187+
break;
188+
189+
case 'b':
190+
hairpin = optarg;
191+
flag++;
192+
break;
193+
194+
case 'g':
195+
gtf = optarg;
196+
flag++;
197+
break;
198+
199+
case 'o':
200+
output_path = optarg;
201+
flag++;
202+
break;
203+
204+
case 'm':
205+
mode = atoi(optarg);
206+
flag++;
207+
break;
208+
209+
case 't':
210+
times = atoi(optarg);
211+
flag++;
212+
break;
213+
214+
case 'h':
215+
helpMessage(argv[0]);
216+
break;
217+
218+
//case ':':
219+
case '?':
220+
// if(optopt
221+
fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]);
222+
return(-2);
223+
224+
//default:
225+
// fprintf(stderr, "%s: invalid option -- %c\n", argv[0], opt);
226+
// fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]);
227+
// return(-2);
228+
}
229+
// };
230+
//};
231+
}
232+
233+
//FIXME write better way to communicate with user.
234+
if(flag != 6) helpMessage(argv[0]); //If not enough arguments have been passed.
235+
236+
path_check(output_path);
237+
238+
fixHairpinFile(hairpin, fixed_hairpin);
239+
240+
mapMItoMIMAT(gtf, mi_mimat);
241+
242+
std::string mature_species_spec = "mature.species.spec.fa";
243+
line_by_species(mature, mature_species_spec); //Select lines with specific species.
244+
245+
line_by_species(hairpin, fixed_hairpin_species_spec); //Select lines with specific species.
246+
247+
std::string output = output_path + "simulated_miRNA_BF.fa";
248+
249+
std::ofstream output_fp; //Output FASTA file.
250+
output_fp.open(output.c_str());
251+
252+
input = mature_species_spec;
253+
254+
if(mode == 1){//FIXME Better calls?
255+
modification_type = "5prime_templated_addition";
256+
}else if (mode == 2){
257+
modification_type = "3prime_templated_addition";
258+
}else if (mode == 3){
259+
modification_type = "5prime_addition";
260+
}else if (mode == 4){
261+
modification_type = "3prime_addition";
262+
}else if (mode == 5){
263+
modification_type = "SNP";
264+
}else if (mode == 6){
265+
modification_type = "indel";
266+
}else if (mode == 7){
267+
modification_type = "3prime_loss";
268+
}else if (mode == 8){
269+
modification_type = "5prime_loss";
270+
}
271+
272+
if(mode == 9){
273+
modification_type = "no_modification";
274+
bruteForce(input, output_fp, "fasta", 0, times);
275+
modification_type = "5prime_templated_addition";
276+
bruteForce(input, output_fp, "fasta", 1, times);
277+
modification_type = "3prime_templated_addition";
278+
bruteForce(input, output_fp, "fasta", 2, times);
279+
modification_type = "5prime_addition";
280+
bruteForce(input, output_fp, "fasta", 3, times);
281+
modification_type = "3prime_addition";
282+
bruteForce(input, output_fp, "fasta", 4, times);
283+
modification_type = "SNP";
284+
bruteForce(input, output_fp, "fasta", 5, times);
285+
modification_type = "indel";
286+
bruteForce(input, output_fp, "fasta", 6, times);
287+
modification_type = "5prime_loss";
288+
bruteForce(input, output_fp, "fasta", 7, times);
289+
modification_type = "3prime_loss";
290+
bruteForce(input, output_fp, "fasta", 8, times);
291+
}else{
292+
bruteForce(input, output_fp, "fasta", mode, times);
293+
}
294+
output_fp.close();
295+
296+
delete_file(mature_species_spec);
297+
delete_file(fixed_hairpin);
298+
delete_file(fixed_hairpin_species_spec);
299+
300+
}

0 commit comments

Comments
 (0)