-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathswitch_letter_cases_in_fasta.pl
executable file
·100 lines (81 loc) · 2.42 KB
/
switch_letter_cases_in_fasta.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Long;
##############################################################
# script: switch_letter_cases_in_fasta.pl
# author: Jia-Xing Yue (GitHub ID: yjx1217)
# last edited: 2018.01.11
# description: convert sequences in a fasta file to all uppercases or to all lowercases
# example: perl switch_letter_cases_in_fasta.pl -i input.fa(.gz) -o output.fa(.gz) -c upper # to transform all lowercases to uppercases
# example: perl switch_letter_cases_in_fasta.pl -i input.fa(.gz) -o output.fa(.gz) -c lower # to transform all uppercases to lowercases
##############################################################
my ($input, $output, $case);
$case = "upper"; # normal mode, output sequences found in the list.
GetOptions('input|i:s' => \$input,
'output|o:s' => \$output,
'case|c:s' => \$case); # case can be 'lower'(default) or 'upper'.
my $input_fh = read_file($input);
my @input = ();
my %input = ();
parse_fasta_file($input_fh, \%input, \@input);
my $output_fh = write_file($output);
foreach my $id (@input) {
if ($case =~ /^upper/) {
my $seq = uc $input{$id};
print $output_fh ">$id\n$seq\n";
} elsif ($case =~ /^lower/) {
my $seq = lc $input{$id};
print $output_fh ">$id\n$seq\n";
} else {
die "unrecognized case (-c) option: $case! Please only use 'upper' or 'lower'!\n";
}
}
sub read_file {
my $file = shift @_;
my $fh;
if ($file =~ /\.gz$/) {
open($fh, "gunzip -c $file |") or die "can't open pipe to $file";
} else {
open($fh, $file) or die "can't open $file";
}
return $fh;
}
sub write_file {
my $file = shift @_;
my $fh;
if ($file =~ /.gz$/) {
open($fh, "| gzip -c >$file") or die "can't open $file\n";
} else {
open($fh, ">$file") or die "can't open $file\n";
}
return $fh;
}
sub parse_list_file {
my $fh = shift @_;
my %list = ();
while (<$fh>) {
chomp;
$_ =~ s/\s+$//g;
$list{$_}++;
}
return %list;
}
sub parse_fasta_file {
my ($fh, $input_hashref, $input_arrayref) = @_;
my $seq_name = "";
while (<$fh>) {
chomp;
if (/^\s*$/) {
next;
} elsif (/^\s*#/) {
next;
} elsif (/^>(.*)/) {
$seq_name = $1;
push @$input_arrayref, $seq_name;
$$input_hashref{$seq_name} = "";
} else {
$$input_hashref{$seq_name} .= $_;
}
}
}