-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_preps.pl
81 lines (59 loc) · 1.62 KB
/
find_preps.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/perl
use strict;
my $dirname = shift;
my @flist = <$dirname/*.txt>;
foreach my $fname ( @flist ) {
open FP, $fname or die "Cannot open $fname\n";
my $str = "";
while( <FP> ) {
$str .= $_;
}
# each @lines index contains a line of CoNLL formatted text
my @lines = split "\n", $str;
my @output = ();
my @heads = ();
my $prepcount = 0;
my $matchcount = 0;
for( my $i = 0; $i < @lines; $i++ ) {
# divide CoNLL formatted text into columns
my @cols = split "\t", $lines[$i];
push @output, $lines[$i]; # output buffer
# ----- denotes the end of an utterance "block"
if( $lines[$i] =~ /\-\-\-\-\-/ ) {
# if there are 2+ prepositional phrases in a block
# iterate over their heads and look for at least one match
# display utterance info. and update counter if match found
if ( $prepcount > 1 ) {
my %match;
foreach my $head ( @heads ) {
# continue iterating unless we find a match
next unless $match{ $head }++;
$matchcount++;
print "\nMatch #$matchcount\n";\
print "Line: $i\n";
print "\nThe count is: $prepcount";
print "\nThe heads are: @heads\n";
# print phrase details in CoNLL format
for( my $j = 1; $j < @output; $j++ ) {
print "$output[$j]\n";
}
# break and clear @output for next utterance if match found
@output = ();
last;
}
}
else {
@output = ();
}
# reset prepcount and clear @heads for next utterance
$prepcount = 0;
@heads = ();
}
# if the current word is a preposition
if( $cols[3] =~ /prep/ ) {
$prepcount++;
push @heads, $cols[6];
}
}
print "\n";
}