-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
242 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#!/usr/bin/python3 | ||
import setup_run_dir # Set the working directory and python sys.path to 2 levels above | ||
import os | ||
import penman | ||
from amrlib.graph_processing.amr_loading_raw import load_raw_amr | ||
from amrlib.alignments.penman_utils import test_for_decode_encode_issue, strip_surface_alignments | ||
|
||
|
||
# Get rid of un-needed metadata and rename "alignments", "isi_alignments" | ||
def mod_graph_meta(graph): | ||
id = graph.metadata['id'] | ||
tok = graph.metadata['tok'] | ||
aligns = graph.metadata['alignments'] | ||
graph.metadata = {'id':id, 'tok':tok, 'isi_alignments':aligns} | ||
return graph | ||
|
||
|
||
# Build a corpus of test cases for alignments | ||
if __name__ == '__main__': | ||
corp_dir = 'amrlib/data/amr_annotation_3.0/data/alignments/split/test' | ||
graph_fn = 'amrlib/data/alignments/test_w_surface.txt' | ||
graph_ns_fn = 'amrlib/data/alignments/test_no_surface.txt' | ||
|
||
os.makedirs(os.path.dirname(graph_fn), exist_ok=True) | ||
|
||
# Loop through the files and load all entries | ||
entries = [] | ||
print('Loading data from', corp_dir) | ||
fpaths = [os.path.join(corp_dir, fn) for fn in os.listdir(corp_dir)] | ||
for fpath in fpaths: | ||
entries += load_raw_amr(fpath) | ||
print('Loaded {:,} entries'.format(len(entries))) | ||
|
||
# Check for the penman decode/re-encode issue and strip some metadata | ||
good_graphs = [] | ||
good_graphs_ns = [] | ||
for entry in entries: | ||
# Create a version with No Surface alignments | ||
entry_ns = strip_surface_alignments(entry) | ||
graph, is_good = test_for_decode_encode_issue(entry) | ||
graph_ns, is_good_ns = test_for_decode_encode_issue(entry_ns) | ||
if is_good and is_good_ns: | ||
good_graphs.append( mod_graph_meta(graph) ) | ||
good_graphs_ns.append( mod_graph_meta(graph_ns) ) | ||
|
||
# Save the collated data | ||
print('Saving {:,} good graphs to {:} and {:}'.format(len(good_graphs), graph_fn, graph_ns_fn)) | ||
penman.dump(good_graphs, graph_fn, indent=6) | ||
penman.dump(good_graphs_ns, graph_ns_fn, indent=6) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#!/usr/bin/python3 | ||
import os | ||
import sys | ||
sys.path.insert(0, '../..') # make '..' first in the lib search path | ||
import logging | ||
import unittest | ||
import spacy | ||
import amrlib | ||
from amrlib.defaults import data_dir | ||
|
||
# Base classes and relative imports are proving to be problematic so for now, simply copy the code. | ||
|
||
# UnitTest creates a separate instance of the class for each test in it so __init__ gets called | ||
# a bunch of times. However, they all seem to run in the same process so globals are shared. | ||
# To avoid loading Spacy multiple times cache it in a global variable. | ||
# For the stog_model, amrlib caches this and since there is only one process it will stay in-memory | ||
# across all unit tests (even ones in other files when run with RunAllUnitTests.py) until explicity | ||
# reloaded with amrlib.load_stog_model(model_dir). | ||
# When the spacy extensions are called the they check to see if a global stog_model is not None, and | ||
# only call the loader if it's not already loaded. | ||
SPRING_LOADED = None # one-shot to assure amrlib.stog_model is reloaded with this specific model | ||
SPACY_NLP = None | ||
class ModelParseSPRING(unittest.TestCase): | ||
model_dir = os.path.join(data_dir, 'model_parse_spring-v0_1_0') | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
amrlib.setup_spacy_extension() | ||
# Load/cache spacy | ||
global SPACY_NLP | ||
if SPACY_NLP is None: | ||
SPACY_NLP = spacy.load('en_core_web_sm') | ||
self.nlp = SPACY_NLP | ||
# Load model in amrlib (amrlib will cache this itself) | ||
global SPRING_LOADED | ||
if SPRING_LOADED is None: | ||
print('Loading', self.model_dir) | ||
amrlib.load_stog_model(model_dir=self.model_dir) | ||
SPRING_LOADED = True | ||
self.stog = amrlib.stog_model | ||
|
||
def testStoG(self): | ||
graphs = self.stog.parse_sents(['This is a test of the system.']) | ||
self.assertEqual(len(graphs), 1) | ||
|
||
def testSpaCyDoc(self): | ||
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence') | ||
graphs = doc._.to_amr() | ||
self.assertEqual(len(graphs), 2) | ||
|
||
def testSpaCySpan(self): | ||
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence') | ||
span = list(doc.sents)[0] # first sentence only | ||
graphs = span._.to_amr() | ||
self.assertEqual(len(graphs), 1) | ||
|
||
|
||
if __name__ == '__main__': | ||
level = logging.WARNING | ||
format = '[%(levelname)s %(filename)s ln=%(lineno)s] %(message)s' | ||
logging.basicConfig(level=level, format=format) | ||
|
||
# run all methods that start with 'test' | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#!/usr/bin/python3 | ||
import os | ||
import sys | ||
sys.path.insert(0, '../..') # make '..' first in the lib search path | ||
import logging | ||
import unittest | ||
import spacy | ||
import amrlib | ||
from amrlib.defaults import data_dir | ||
|
||
# Base classes and relative imports are proving to be problematic so for now, simply copy the code. | ||
|
||
# UnitTest creates a separate instance of the class for each test in it so __init__ gets called | ||
# a bunch of times. However, they all seem to run in the same process so globals are shared. | ||
# To avoid loading Spacy multiple times cache it in a global variable. | ||
# For the stog_model, amrlib caches this and since there is only one process it will stay in-memory | ||
# across all unit tests (even ones in other files when run with RunAllUnitTests.py) until explicity | ||
# reloaded with amrlib.load_stog_model(model_dir). | ||
# When the spacy extensions are called the they check to see if a global stog_model is not None, and | ||
# only call the loader if it's not already loaded. | ||
T5V1_LOADED = None # one-shot to assure amrlib.stog_model is reloaded with this specific model | ||
SPACY_NLP = None | ||
class ModelParseT5v1(unittest.TestCase): | ||
model_dir = os.path.join(data_dir, 'model_parse_t5-v0_1_0') | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
amrlib.setup_spacy_extension() | ||
# Load/cache spacy | ||
global SPACY_NLP | ||
if SPACY_NLP is None: | ||
SPACY_NLP = spacy.load('en_core_web_sm') | ||
self.nlp = SPACY_NLP | ||
# Load model in amrlib (amrlib will cache this itself) | ||
global T5V1_LOADED | ||
if T5V1_LOADED is None: | ||
print('Loading', self.model_dir) #rbf | ||
amrlib.load_stog_model(model_dir=self.model_dir) | ||
T5V1_LOADED = True | ||
self.stog = amrlib.stog_model | ||
|
||
def testStoG(self): | ||
graphs = self.stog.parse_sents(['This is a test of the system.']) | ||
self.assertEqual(len(graphs), 1) | ||
|
||
def testSpaCyDoc(self): | ||
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence') | ||
graphs = doc._.to_amr() | ||
self.assertEqual(len(graphs), 2) | ||
|
||
def testSpaCySpan(self): | ||
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence') | ||
span = list(doc.sents)[0] # first sentence only | ||
graphs = span._.to_amr() | ||
self.assertEqual(len(graphs), 1) | ||
|
||
|
||
if __name__ == '__main__': | ||
level = logging.WARNING | ||
format = '[%(levelname)s %(filename)s ln=%(lineno)s] %(message)s' | ||
logging.basicConfig(level=level, format=format) | ||
|
||
# run all methods that start with 'test' | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#!/usr/bin/python3 | ||
import os | ||
import sys | ||
sys.path.insert(0, '../..') # make '..' first in the lib search path | ||
import logging | ||
import unittest | ||
import spacy | ||
import amrlib | ||
from amrlib.defaults import data_dir | ||
|
||
# Base classes and relative imports are proving to be problematic so for now, simply copy the code. | ||
|
||
# UnitTest creates a separate instance of the class for each test in it so __init__ gets called | ||
# a bunch of times. However, they all seem to run in the same process so globals are shared. | ||
# To avoid loading Spacy multiple times cache it in a global variable. | ||
# For the stog_model, amrlib caches this and since there is only one process it will stay in-memory | ||
# across all unit tests (even ones in other files when run with RunAllUnitTests.py) until explicity | ||
# reloaded with amrlib.load_stog_model(model_dir). | ||
# When the spacy extensions are called the they check to see if a global stog_model is not None, and | ||
# only call the loader if it's not already loaded. | ||
T5V2_LOADED = None # one-shot to assure amrlib.stog_model is reloaded with this specific model | ||
SPACY_NLP = None | ||
class ModelParseT5v2(unittest.TestCase): | ||
model_dir = os.path.join(data_dir, 'model_parse_t5-v0_2_0') | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
amrlib.setup_spacy_extension() | ||
# Load/cache spacy | ||
global SPACY_NLP | ||
if SPACY_NLP is None: | ||
SPACY_NLP = spacy.load('en_core_web_sm') | ||
self.nlp = SPACY_NLP | ||
# Load model in amrlib (amrlib will cache this itself) | ||
global T5V2_LOADED | ||
if T5V2_LOADED is None: | ||
print('Loading', self.model_dir) | ||
amrlib.load_stog_model(model_dir=self.model_dir) | ||
T5V2_LOADED = True | ||
self.stog = amrlib.stog_model | ||
|
||
def testStoG(self): | ||
graphs = self.stog.parse_sents(['This is a test of the system.']) | ||
self.assertEqual(len(graphs), 1) | ||
|
||
def testSpaCyDoc(self): | ||
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence') | ||
graphs = doc._.to_amr() | ||
self.assertEqual(len(graphs), 2) | ||
|
||
def testSpaCySpan(self): | ||
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence') | ||
span = list(doc.sents)[0] # first sentence only | ||
graphs = span._.to_amr() | ||
self.assertEqual(len(graphs), 1) | ||
|
||
|
||
if __name__ == '__main__': | ||
level = logging.WARNING | ||
format = '[%(levelname)s %(filename)s ln=%(lineno)s] %(message)s' | ||
logging.basicConfig(level=level, format=format) | ||
|
||
# run all methods that start with 'test' | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters