-
Notifications
You must be signed in to change notification settings - Fork 2
/
EMBL_parser.pm
111 lines (81 loc) · 1.89 KB
/
EMBL_parser.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package EMBL_parser;
use strict;
use warnings;
use Carp;
sub new {
my $packagename = shift;
my $filename = shift;
my $fh;
if ($filename =~ /\.gz$/) {
open ($fh, "gunzip -c $filename | ");
}
else {
open ($fh, $filename) or confess "Error, cannot open file $filename";
}
my $self = { fh => $fh,
filename => $filename,
};
bless ($self, $packagename);
return($self);
}
sub next {
my $self = shift;
my $fh = $self->{fh};
my $record_text = "";
while (my $line = <$fh>) {
$record_text .= $line;
if ($line =~ m|^//|) {
last;
}
}
if ($record_text) {
my $embl_record = EMBL_record->new($record_text);
return($embl_record);
}
else {
return undef;
}
}
######################
######################
package EMBL_record;
use strict;
use warnings;
use Carp;
####
sub new {
my $packagename = shift;
my $record_text = shift;
unless ($record_text) {
confess "Error, need record text to build record obj";
}
my $self = {
record => $record_text,
sections => {}, # key => text separated by newlines for those entries with the same token
};
bless ($self, $packagename);
$self->init($record_text);
return($self);
}
####
sub init {
my $self = shift;
my $record_text = shift;
my @lines = split(/\n/, $record_text);
my $prev_tok = "";
foreach my $line (@lines) {
$line .= "\n";
if ($line =~ /^(\S{2})\s+(.*)$/s) {
$prev_tok = $1;
$self->{sections}->{$prev_tok} .= $2;
}
else {
if (! $prev_tok) {
confess "Error, have line $line but no prev_tok";
}
$self->{sections}->{$prev_tok} .= $line;
}
}
return;
}
1; #EOM