forked from andand/DEGEPRIME
-
Notifications
You must be signed in to change notification settings - Fork 7
/
MakeSilvaTaxonomy.pl
62 lines (41 loc) · 1.39 KB
/
MakeSilvaTaxonomy.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/perl -w
=head1 NAME
MakeSilvaTaxonomy.pl - Extracts taxonomic information from a Silva (http://www.arb-silva.de/) file in fasta format and outputs a tab-separated file with sequence id in first column and taxonomy in second column (with the different taxonoimc levels separated by semicolon).
=head1 USAGE
perl MakeSilvaTaxonomy.pl -i SILVA_TAX_FASTA_FILE -o OUTPUT_FILE [-h]
=head1 POSITIONAL ARGUMENTS
-i RDP_GENBANK_FILE Specify SILVA file in fasta format with taxonomies in sequence headers
-o OUTPUT_FILE Specify output file name (overwrites existing file with same name)
=head1 OPIONAL ARGUMENTS
-h Print this help message
=cut
use Getopt::Long;
$infile = undef;
$outfile = undef;
&GetOptions('i=s' => \$infile, 'o=s' => \$outfile, 'h!' => \$help);
if (!$infile or !$outfile or $help) {
system ('perldoc', $0);
exit;
}
#####
print"\nRunning MakeSilvaTaxonomy\n";
&extract_taxonomy_from_silvafasta;
print"Finnished MakeSilvaTaxonomy succesfully\n\n";
#####
sub extract_taxonomy_from_silvafasta {
open (INFILE, "$infile") || die ("Can't open $infile\n");
open (OUT, ">$outfile");
while (<INFILE>) {
chomp;
$row = $_;
if (substr($row, 0, 1) eq ">") {
@fields = split(/\s+/, $row, 2); # split at first space
$id = $fields[0];
substr($id, 0, 1) = "";
$tax = $fields[1];
print OUT "$id\t$tax\n";
}
}
close (INFILE);
close(OUT);
}