-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathfastaSubset.pl
executable file
·53 lines (45 loc) · 1.36 KB
/
fastaSubset.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/perl
# Script: fastaSubset.pl
# Description: Takes a large fasta file and parses it into several smaller fasta files containing the desired number of sequences
# Author: Steven Ahrendt
# email: [email protected]
# Date: 09.17.2014
##################################
use warnings;
use strict;
use Getopt::Long;
use Bio::Seq;
use Bio::SeqIO;
use lib '/rhome/sahrendt/Scripts';
#####-----Global Variables-----#####
my $input;
my $size = 1000;
my ($help,$verb);
GetOptions ('i|input=s' => \$input,
's|size=s' => \$size,
'h|help' => \$help,
'v|verbose' => \$verb);
my $usage = "Usage: fastaSubset.pl -i input\nTakes a large fasta file and parses it into several smaller fasta files containing the desired number of sequences\n";
die $usage if $help;
die "No input.\n$usage" if (!$input);
#####-----Main-----#####
my $seqin_obj = Bio::SeqIO->new(-file => $input,
-format => "fasta");
my $seq_c = 0; # seq counter
my $file_c = 0; # file counter
while(my $seq_obj = $seqin_obj->next_seq)
{
if($seq_c == $size)
{
$seq_c = 0;
$file_c++;
}
my $seqout_obj = Bio::SeqIO->new(-file => ">>$input\.$file_c",
-format => "fasta");
$seqout_obj->write_seq($seq_obj);
# print "$seq_c\t$file_c\n";
$seq_c++;
}
warn "Done.\n";
exit(0);
#####-----Subroutines-----#####