-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatrix_from_list.pl
executable file
·109 lines (102 loc) · 2.36 KB
/
matrix_from_list.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#! /usr/bin/perl -w
die "Use this to filter a list an make OTUs from the list
Usage: matfile otulist eco/phylo/mothur > redirect\n" unless (@ARGV);
($mat, $otulist, $type)=(@ARGV);
chomp ($mat);
chomp ($otulist);
chomp ($type);
die "Please follow command line arg\n" unless ($type);
open (IN, "<$mat" ) or die "Can't open $mat\n";
$first=1;
while ($line =<IN>){
chomp ($line);
next unless ($line);
($oldOTU, @p)=split ("\t", $line);
if ($oldOTU=~/^(ID.+M)/){
($OTU)=$oldOTU=~/^(ID.+M)/;
} else {
$OTU=$oldOTU;
}
if ($first){
(@headers)=(@p);
$first=();
} else {
$i=0;
$j=@p;
until ($i >=$j){
$mathash{$OTU}{$headers[$i]}=$p[$i];
$abundhash{$OTU}+=$p[$i];
$mathashgot{$OTU}{$headers[$i]}++;
$i++;
}
}
}
close (IN);
open (IN, "<$otulist" ) or die "Can't open $otulist\n";
$first=1;
while ($line =<IN>){
chomp ($line);
next unless ($line);
if ($type eq "phylo"){
($otunumber, @groups)=split ("\t", $line);
} elsif ($type eq "eco"){
(@groups)=split ("\t", $line);
} elsif ($type eq "mothur"){
($otunumber, $groupline)=split ("\t", $line);
(@groups)=split (",", $groupline);
} else {
die "I don't recognize the type\n";
}
$OTUname=();
$mostabund=();
foreach $oldname (@groups){
if ($oldname=~/(ID.+M)/){
($name)=$oldname=~/(ID.+M)/;
} else {
$name=$oldname;
}
if ($mostabund){
#call the OTU by the most abudant name
if ($abundhash{$name}>$mostabund){
$OTUname=$name;
$mostabund=$abundhash{$name};
}
} else {
$mostabund=$abundhash{$name};
$OTUname=$name;
}
}
if ($type eq "phylo"){
$transhash{$OTUname}=$otunumber;
} elsif ($type eq "eco"){
$transhash{$OTUname}=$OTUname;
}
foreach $oldname (@groups){
if ($oldname=~/^(ID.+M)/){
($name)=$oldname=~/^(ID.+M)/;
} else {
$name=$oldname;
}
#now get the overall distribution from the mathash
foreach $head (@headers){
if ($mathashgot{$name}{$head}){
$finalhash{$transhash{$OTUname}}{$head}+=$mathash{$name}{$head};
} else {
die "Missing $name from mathash\n";
}
}
}
}
close (IN);
print "OTU";
foreach $head (@headers){
print "\t$head";
}
print "\n";
foreach $OTU (sort keys %finalhash){
print "$OTU";
foreach $head (@headers){
print "\t$finalhash{$OTU}{$head}";
}
print "\n";
}