|
| 1 | +#include<stdio.h> |
| 2 | +#include<stdlib.h> |
| 3 | +#include<iostream> |
| 4 | +#include<string> |
| 5 | +#include<fstream> |
| 6 | +#include <regex> |
| 7 | +#include <map> |
| 8 | +#include <getopt.h> |
| 9 | +#include "project/simulator.h" |
| 10 | +#include "project/brute_force.h" |
| 11 | +#include "project/utils.h" |
| 12 | + |
| 13 | +std::string modification_type; |
| 14 | + |
| 15 | +std::string mode; |
| 16 | + |
| 17 | +std::string output_path; |
| 18 | + |
| 19 | +std::string fixed_hairpin = "hairpin.fixed.fa"; |
| 20 | + |
| 21 | +std::string fixed_hairpin_species_spec = "hairpin.fixed.species.spec.fa"; |
| 22 | + |
| 23 | +int times; /* Number of generated sequences of the same type */ |
| 24 | + |
| 25 | +std::string species = "hsa"; /* Default species for which the simulation will be performed */ |
| 26 | + |
| 27 | +std::vector <std::pair<string, string>> mi_mimat; |
| 28 | + |
| 29 | +void helpMessage (std::string program_name) { //FIXME |
| 30 | + printf("\nUsage: %s [OPTIONS]\n", program_name.c_str()); |
| 31 | + printf(" -a <file> : Path to mature.fa file. \n"); |
| 32 | + printf(" -b <file> : Path to hairpin.fa file. \n"); |
| 33 | + printf(" -g <file> : Path to miRBase gtf file. \n"); |
| 34 | + printf(" -o <path> : Output path. \n"); |
| 35 | + printf(" -s <string> : Species for the generated sequences (e.g. hsa). \n"); |
| 36 | + printf(" -m <int> : Mode for the brute force execution. \n"); |
| 37 | + printf(" -t <int> : Number of identical modified sequences sequences. \n"); |
| 38 | + printf(" Available modes:\n"); |
| 39 | + printf(" 0: Canonical miRNAs \n"); |
| 40 | + printf(" 1: 5' templated addition \n"); |
| 41 | + printf(" 2: 3' templated addition \n"); |
| 42 | + printf(" 3: 5' non-templated addition (A and Ts) \n"); |
| 43 | + printf(" 4: 3' non-templated addition (A and Ts)\n"); |
| 44 | + printf(" 5: SNPs \n"); |
| 45 | + printf(" 6: Indels \n"); |
| 46 | + printf(" 7: 3' templated losses \n"); |
| 47 | + printf(" 8: 5' templated losses \n"); |
| 48 | + printf(" 9: Options 0-8 \n"); |
| 49 | + printf("\n"); |
| 50 | + exit(1); |
| 51 | + |
| 52 | +} |
| 53 | + |
| 54 | +/* Concatenate sequences lines in hairpin file. */ |
| 55 | +void fixHairpinFile (std::string input_file, std::string output_file) { |
| 56 | + |
| 57 | + std::ifstream input; |
| 58 | + std::ofstream output; |
| 59 | + |
| 60 | + input.open(input_file.c_str()); |
| 61 | + output.open(output_file.c_str()); |
| 62 | + int start = 0; |
| 63 | + std::string hairpin; |
| 64 | + |
| 65 | + for( std::string line; getline( input, line ); ) |
| 66 | + { |
| 67 | + if( std::regex_match (line, std::regex("(>)(.*)")) ){ |
| 68 | + |
| 69 | + if(start) output << hairpin << "\n"; |
| 70 | + |
| 71 | + output << line << "\n"; |
| 72 | + |
| 73 | + hairpin = ""; |
| 74 | + |
| 75 | + }else{ |
| 76 | + |
| 77 | + hairpin = hairpin + line; |
| 78 | + start = 1; |
| 79 | + |
| 80 | + } |
| 81 | + |
| 82 | + } |
| 83 | + |
| 84 | + output << hairpin << "\n"; //Write the last line |
| 85 | + output.close(); |
| 86 | +} |
| 87 | + |
| 88 | +//Create mapping of MI and MIMAT IDs form GTF file. |
| 89 | +void mapMItoMIMAT(std::string input, std::vector <std::pair<std::string, std::string>> &mi_mimat){ |
| 90 | + |
| 91 | + std::ifstream fp; |
| 92 | + fp.open(input.c_str()); |
| 93 | + std::smatch match; |
| 94 | + for( std::string line; getline( fp, line ); ) |
| 95 | + { |
| 96 | + if (std::regex_match (line, std::regex("(.*)(Derives_from)(.*)"))) |
| 97 | + { |
| 98 | + std::vector<std::string> tab = split(line, '\t'); |
| 99 | + std::vector<std::string> semicolon = split(tab[8], ';'); |
| 100 | + std::string mimat = semicolon[0].substr(3,semicolon[0].size()); |
| 101 | + std::string mi = semicolon[3].substr(13,semicolon[3].size()); |
| 102 | + mi_mimat.emplace_back(mi, mimat); |
| 103 | + } |
| 104 | + |
| 105 | + } |
| 106 | + |
| 107 | + |
| 108 | +} |
| 109 | + |
| 110 | +void line_by_species(std::string input_file, std::string output_file){ |
| 111 | + |
| 112 | + int flag = 0; |
| 113 | + int counter = 1; |
| 114 | + std::ifstream input; |
| 115 | + std::ofstream output; |
| 116 | + input.open(input_file.c_str()); |
| 117 | + output.open(output_file.c_str()); |
| 118 | + |
| 119 | + for( std::string line; getline( input, line ); ) |
| 120 | + { |
| 121 | + if (line.find('>'+species) != std::string::npos) { |
| 122 | + output << line << "\n"; |
| 123 | + flag = 1; |
| 124 | + } |
| 125 | + if(counter % 2 == 0 && flag == 1){ //Sequence line |
| 126 | + output << line << "\n"; |
| 127 | + flag = 0; |
| 128 | + } |
| 129 | + counter++; |
| 130 | + } |
| 131 | + output.close(); |
| 132 | + |
| 133 | +} |
| 134 | + |
| 135 | +int main(int argc, char **argv) |
| 136 | +{ |
| 137 | + |
| 138 | + int aflag = 0; |
| 139 | + int bflag = 0; |
| 140 | + char *cvalue = NULL; |
| 141 | + std::string fasta; |
| 142 | + int index; |
| 143 | + |
| 144 | + //int opt; |
| 145 | + const char * short_opt = "hm:t:a:b:g:o:s:"; |
| 146 | + struct option long_opt[] = |
| 147 | + { |
| 148 | + {"help", no_argument, nullptr, 'h'}, |
| 149 | + {"mature", required_argument, nullptr, 'a'}, |
| 150 | + {"hairpin", required_argument, nullptr, 'b'}, |
| 151 | + {"gtf", required_argument, nullptr, 'g'}, |
| 152 | + {"output", required_argument, nullptr, 'o'}, |
| 153 | + {"mode", required_argument, nullptr, 'm'}, |
| 154 | + {"times", required_argument, nullptr, 't'}, |
| 155 | + {nullptr, no_argument, nullptr, 0 } |
| 156 | + }; |
| 157 | + |
| 158 | + if(argc == 1) helpMessage(argv[0]); //If no arguments have been passed |
| 159 | + |
| 160 | + std::string input; |
| 161 | + std::string mature; |
| 162 | + std::string hairpin; |
| 163 | + std::string gtf; |
| 164 | + |
| 165 | + int mode; |
| 166 | + |
| 167 | + |
| 168 | +// while((opt = getopt_long(argc, argv, short_opt, long_opt, NULL)) != -1) |
| 169 | +// { |
| 170 | + int flag = 0; |
| 171 | + while(true) |
| 172 | + { |
| 173 | + const auto opt = getopt_long(argc, argv, short_opt, long_opt, NULL); |
| 174 | + |
| 175 | + if (-1 == opt) |
| 176 | + break; |
| 177 | + |
| 178 | + switch(opt) //FIXME Add modes for BF, MonteCarlo +++ |
| 179 | + { |
| 180 | +// case -1: /* no more arguments */ |
| 181 | +// case 0: /* long options toggles */ |
| 182 | +// break; |
| 183 | + |
| 184 | + case 'a': |
| 185 | + mature = optarg; |
| 186 | + flag++; |
| 187 | + break; |
| 188 | + |
| 189 | + case 'b': |
| 190 | + hairpin = optarg; |
| 191 | + flag++; |
| 192 | + break; |
| 193 | + |
| 194 | + case 'g': |
| 195 | + gtf = optarg; |
| 196 | + flag++; |
| 197 | + break; |
| 198 | + |
| 199 | + case 'o': |
| 200 | + output_path = optarg; |
| 201 | + flag++; |
| 202 | + break; |
| 203 | + |
| 204 | + case 'm': |
| 205 | + mode = atoi(optarg); |
| 206 | + flag++; |
| 207 | + break; |
| 208 | + |
| 209 | + case 't': |
| 210 | + times = atoi(optarg); |
| 211 | + flag++; |
| 212 | + break; |
| 213 | + |
| 214 | + case 'h': |
| 215 | + helpMessage(argv[0]); |
| 216 | + break; |
| 217 | + |
| 218 | + //case ':': |
| 219 | + case '?': |
| 220 | + // if(optopt |
| 221 | + fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]); |
| 222 | + return(-2); |
| 223 | + |
| 224 | + //default: |
| 225 | +// fprintf(stderr, "%s: invalid option -- %c\n", argv[0], opt); |
| 226 | +// fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]); |
| 227 | +// return(-2); |
| 228 | + } |
| 229 | + // }; |
| 230 | + //}; |
| 231 | + } |
| 232 | + |
| 233 | + //FIXME write better way to communicate with user. |
| 234 | + if(flag != 6) helpMessage(argv[0]); //If not enough arguments have been passed. |
| 235 | + |
| 236 | + path_check(output_path); |
| 237 | + |
| 238 | + fixHairpinFile(hairpin, fixed_hairpin); |
| 239 | + |
| 240 | + mapMItoMIMAT(gtf, mi_mimat); |
| 241 | + |
| 242 | + std::string mature_species_spec = "mature.species.spec.fa"; |
| 243 | + line_by_species(mature, mature_species_spec); //Select lines with specific species. |
| 244 | + |
| 245 | + line_by_species(hairpin, fixed_hairpin_species_spec); //Select lines with specific species. |
| 246 | + |
| 247 | + std::string output = output_path + "simulated_miRNA_BF.fa"; |
| 248 | + |
| 249 | + std::ofstream output_fp; //Output FASTA file. |
| 250 | + output_fp.open(output.c_str()); |
| 251 | + |
| 252 | + input = mature_species_spec; |
| 253 | + |
| 254 | + if(mode == 1){//FIXME Better calls? |
| 255 | + modification_type = "5prime_templated_addition"; |
| 256 | + }else if (mode == 2){ |
| 257 | + modification_type = "3prime_templated_addition"; |
| 258 | + }else if (mode == 3){ |
| 259 | + modification_type = "5prime_addition"; |
| 260 | + }else if (mode == 4){ |
| 261 | + modification_type = "3prime_addition"; |
| 262 | + }else if (mode == 5){ |
| 263 | + modification_type = "SNP"; |
| 264 | + }else if (mode == 6){ |
| 265 | + modification_type = "indel"; |
| 266 | + }else if (mode == 7){ |
| 267 | + modification_type = "3prime_loss"; |
| 268 | + }else if (mode == 8){ |
| 269 | + modification_type = "5prime_loss"; |
| 270 | + } |
| 271 | + |
| 272 | + if(mode == 9){ |
| 273 | + modification_type = "no_modification"; |
| 274 | + bruteForce(input, output_fp, "fasta", 0, times); |
| 275 | + modification_type = "5prime_templated_addition"; |
| 276 | + bruteForce(input, output_fp, "fasta", 1, times); |
| 277 | + modification_type = "3prime_templated_addition"; |
| 278 | + bruteForce(input, output_fp, "fasta", 2, times); |
| 279 | + modification_type = "5prime_addition"; |
| 280 | + bruteForce(input, output_fp, "fasta", 3, times); |
| 281 | + modification_type = "3prime_addition"; |
| 282 | + bruteForce(input, output_fp, "fasta", 4, times); |
| 283 | + modification_type = "SNP"; |
| 284 | + bruteForce(input, output_fp, "fasta", 5, times); |
| 285 | + modification_type = "indel"; |
| 286 | + bruteForce(input, output_fp, "fasta", 6, times); |
| 287 | + modification_type = "5prime_loss"; |
| 288 | + bruteForce(input, output_fp, "fasta", 7, times); |
| 289 | + modification_type = "3prime_loss"; |
| 290 | + bruteForce(input, output_fp, "fasta", 8, times); |
| 291 | + }else{ |
| 292 | + bruteForce(input, output_fp, "fasta", mode, times); |
| 293 | + } |
| 294 | + output_fp.close(); |
| 295 | + |
| 296 | + delete_file(mature_species_spec); |
| 297 | + delete_file(fixed_hairpin); |
| 298 | + delete_file(fixed_hairpin_species_spec); |
| 299 | + |
| 300 | +} |
0 commit comments