-
Notifications
You must be signed in to change notification settings - Fork 1
/
nobadnonterm
executable file
·40 lines (34 loc) · 1.13 KB
/
nobadnonterm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/local/bin/perl -w
# Author: Jason Eisner, University of Pennsylvania
# Usage: nobadnonterm train [test]
#
# Produces a version of stdin (or test) that comments out any lines
# where RHS mentions a nonterminal not appearing in train.
#
# This should work even with the output of prefixcounts, or with scored data.
#
# The primary use is preparing input that is palatable to the transformational smoother,
# since the other smoothers know enough to give such nonterminals a probability of 0.
# !!! of course I should just fix the transf. smoother.
require("stamp.inc"); &stamp; # modify $0 and @INC, and print timestamp
die "$0: bad command line flags" if @ARGV && $ARGV[0] =~ /^-./;
$trainfile = shift(@ARGV);
open(TRAIN,$trainfile) || die;
while (<TRAIN>) {
next if (/^\#/);
chop;
s/~//g; # kill arg marks
s/.*\t//g; # leave only RHS behind
for $RHS (split) { $nt{$RHS} = 1; }
}
close(TRAIN);
while (<>) {
$save = $_;
unless (/^\#/) {
chop;
s/~//g; # kill arg marks (but not in saved copy)
s/.*\t//g; # leave only RHS behind
print "# " if grep(!$nt{$_}, split);
}
print $save;
}