-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathfasta2taxonomy.pl
executable file
·69 lines (64 loc) · 1.83 KB
/
fasta2taxonomy.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/perl
# Script: fasta2taxonomy.pl
# Description: Generates a taxonomy file from species in a Fasta description line
# Author: Steven Ahrendt
# email: [email protected]
# Date: 03.13.2014
##################################
# Standard 7:
# Kingdom, Phylum, Class, Order, Family, Genus, Species
# Extended:
# Superkingdom, Kingdom, Phylum, Subphylum, Superclass, Class, Superorder, Order, Superfamily, Family, Subfamily, Genus, Species
####
use warnings;
use strict;
use lib '/rhome/sahrendt/Scripts';
use Bio::Seq;
use Bio::SeqIO;
use SeqAnalysis;
use Getopt::Long;
use Data::Dumper;
#####-----Global Variables-----#####
my $input;
my ($help,$verb);
my $hash_ref;
my $db_form = "flatfile";
my @ranks = qw(Kingdom Phylum Class Order Family Genus Species); # Standard 7 taxonomic rankings
GetOptions ('i|input=s' => \$input,
'd|db=s' => \$db_form,
'h|help' => \$help,
'v|verbose' => \$verb);
my $usage = "Usage: fasta2taxonomy.pl -i input [-d database_format]\nDB format default is \"flatfile\"\nOutput to STDOUT\n";
die $usage if $help;
die "No input.\n$usage" if (!$input);
#####-----Main-----#####
open(IN,"<$input") or die "Can't open $input: $!\n";
while(my $accno = <IN>)
{
chomp $accno;
print "<$accno>" if $verb;
my @tmp = split(/\_/,$accno);
my $genus = $tmp[0];
my $species = $tmp[1];
#shift @tmp;
my $name = join(" ",$genus,$species);
print "<$name>" if $verb;
$hash_ref->{$name} = getTaxonomy($name,$db_form,$verb);
if($hash_ref->{$name}{"kingdom"} ne "NULL")
{
#print "$accno\t";
printTaxonomy($hash_ref,\@ranks,$name,$accno);
}
else
{
open(my $fh,">>","Failed");
print $fh "$accno\n";
close($fh);
}
#print "<<$accno>>" if $verb;
}
#printTaxonomy($hash_ref,\@ranks);
#print Dumper $hash_ref;
warn "Done.\n";
exit(0);
#####-----Subroutines-----#####