Skip to content

Commit

Permalink
update tests for parse models
Browse files Browse the repository at this point in the history
  • Loading branch information
bjascob committed Nov 27, 2021
1 parent c51fc96 commit dc0b792
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 1 deletion.
49 changes: 49 additions & 0 deletions scripts/60_RBW_Aligner/02_Build_Aligment_Test_Corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/python3
import setup_run_dir # Set the working directory and python sys.path to 2 levels above
import os
import penman
from amrlib.graph_processing.amr_loading_raw import load_raw_amr
from amrlib.alignments.penman_utils import test_for_decode_encode_issue, strip_surface_alignments


# Get rid of un-needed metadata and rename "alignments", "isi_alignments"
def mod_graph_meta(graph):
id = graph.metadata['id']
tok = graph.metadata['tok']
aligns = graph.metadata['alignments']
graph.metadata = {'id':id, 'tok':tok, 'isi_alignments':aligns}
return graph


# Build a corpus of test cases for alignments
if __name__ == '__main__':
corp_dir = 'amrlib/data/amr_annotation_3.0/data/alignments/split/test'
graph_fn = 'amrlib/data/alignments/test_w_surface.txt'
graph_ns_fn = 'amrlib/data/alignments/test_no_surface.txt'

os.makedirs(os.path.dirname(graph_fn), exist_ok=True)

# Loop through the files and load all entries
entries = []
print('Loading data from', corp_dir)
fpaths = [os.path.join(corp_dir, fn) for fn in os.listdir(corp_dir)]
for fpath in fpaths:
entries += load_raw_amr(fpath)
print('Loaded {:,} entries'.format(len(entries)))

# Check for the penman decode/re-encode issue and strip some metadata
good_graphs = []
good_graphs_ns = []
for entry in entries:
# Create a version with No Surface alignments
entry_ns = strip_surface_alignments(entry)
graph, is_good = test_for_decode_encode_issue(entry)
graph_ns, is_good_ns = test_for_decode_encode_issue(entry_ns)
if is_good and is_good_ns:
good_graphs.append( mod_graph_meta(graph) )
good_graphs_ns.append( mod_graph_meta(graph_ns) )

# Save the collated data
print('Saving {:,} good graphs to {:} and {:}'.format(len(good_graphs), graph_fn, graph_ns_fn))
penman.dump(good_graphs, graph_fn, indent=6)
penman.dump(good_graphs_ns, graph_ns_fn, indent=6)
63 changes: 63 additions & 0 deletions tests/auto/ModelParseSPRING.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/python3
import os
import sys
sys.path.insert(0, '../..') # make '..' first in the lib search path
import logging
import unittest
import spacy
import amrlib
from amrlib.defaults import data_dir

# Base classes and relative imports are proving to be problematic so for now, simply copy the code.

# UnitTest creates a separate instance of the class for each test in it so __init__ gets called
# a bunch of times. However, they all seem to run in the same process so globals are shared.
# To avoid loading Spacy multiple times cache it in a global variable.
# For the stog_model, amrlib caches this and since there is only one process it will stay in-memory
# across all unit tests (even ones in other files when run with RunAllUnitTests.py) until explicity
# reloaded with amrlib.load_stog_model(model_dir).
# When the spacy extensions are called the they check to see if a global stog_model is not None, and
# only call the loader if it's not already loaded.
SPRING_LOADED = None # one-shot to assure amrlib.stog_model is reloaded with this specific model
SPACY_NLP = None
class ModelParseSPRING(unittest.TestCase):
model_dir = os.path.join(data_dir, 'model_parse_spring-v0_1_0')
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
amrlib.setup_spacy_extension()
# Load/cache spacy
global SPACY_NLP
if SPACY_NLP is None:
SPACY_NLP = spacy.load('en_core_web_sm')
self.nlp = SPACY_NLP
# Load model in amrlib (amrlib will cache this itself)
global SPRING_LOADED
if SPRING_LOADED is None:
print('Loading', self.model_dir)
amrlib.load_stog_model(model_dir=self.model_dir)
SPRING_LOADED = True
self.stog = amrlib.stog_model

def testStoG(self):
graphs = self.stog.parse_sents(['This is a test of the system.'])
self.assertEqual(len(graphs), 1)

def testSpaCyDoc(self):
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence')
graphs = doc._.to_amr()
self.assertEqual(len(graphs), 2)

def testSpaCySpan(self):
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence')
span = list(doc.sents)[0] # first sentence only
graphs = span._.to_amr()
self.assertEqual(len(graphs), 1)


if __name__ == '__main__':
level = logging.WARNING
format = '[%(levelname)s %(filename)s ln=%(lineno)s] %(message)s'
logging.basicConfig(level=level, format=format)

# run all methods that start with 'test'
unittest.main()
63 changes: 63 additions & 0 deletions tests/auto/ModelParseT5v1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/python3
import os
import sys
sys.path.insert(0, '../..') # make '..' first in the lib search path
import logging
import unittest
import spacy
import amrlib
from amrlib.defaults import data_dir

# Base classes and relative imports are proving to be problematic so for now, simply copy the code.

# UnitTest creates a separate instance of the class for each test in it so __init__ gets called
# a bunch of times. However, they all seem to run in the same process so globals are shared.
# To avoid loading Spacy multiple times cache it in a global variable.
# For the stog_model, amrlib caches this and since there is only one process it will stay in-memory
# across all unit tests (even ones in other files when run with RunAllUnitTests.py) until explicity
# reloaded with amrlib.load_stog_model(model_dir).
# When the spacy extensions are called the they check to see if a global stog_model is not None, and
# only call the loader if it's not already loaded.
T5V1_LOADED = None # one-shot to assure amrlib.stog_model is reloaded with this specific model
SPACY_NLP = None
class ModelParseT5v1(unittest.TestCase):
model_dir = os.path.join(data_dir, 'model_parse_t5-v0_1_0')
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
amrlib.setup_spacy_extension()
# Load/cache spacy
global SPACY_NLP
if SPACY_NLP is None:
SPACY_NLP = spacy.load('en_core_web_sm')
self.nlp = SPACY_NLP
# Load model in amrlib (amrlib will cache this itself)
global T5V1_LOADED
if T5V1_LOADED is None:
print('Loading', self.model_dir) #rbf
amrlib.load_stog_model(model_dir=self.model_dir)
T5V1_LOADED = True
self.stog = amrlib.stog_model

def testStoG(self):
graphs = self.stog.parse_sents(['This is a test of the system.'])
self.assertEqual(len(graphs), 1)

def testSpaCyDoc(self):
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence')
graphs = doc._.to_amr()
self.assertEqual(len(graphs), 2)

def testSpaCySpan(self):
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence')
span = list(doc.sents)[0] # first sentence only
graphs = span._.to_amr()
self.assertEqual(len(graphs), 1)


if __name__ == '__main__':
level = logging.WARNING
format = '[%(levelname)s %(filename)s ln=%(lineno)s] %(message)s'
logging.basicConfig(level=level, format=format)

# run all methods that start with 'test'
unittest.main()
63 changes: 63 additions & 0 deletions tests/auto/ModelParseT5v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/python3
import os
import sys
sys.path.insert(0, '../..') # make '..' first in the lib search path
import logging
import unittest
import spacy
import amrlib
from amrlib.defaults import data_dir

# Base classes and relative imports are proving to be problematic so for now, simply copy the code.

# UnitTest creates a separate instance of the class for each test in it so __init__ gets called
# a bunch of times. However, they all seem to run in the same process so globals are shared.
# To avoid loading Spacy multiple times cache it in a global variable.
# For the stog_model, amrlib caches this and since there is only one process it will stay in-memory
# across all unit tests (even ones in other files when run with RunAllUnitTests.py) until explicity
# reloaded with amrlib.load_stog_model(model_dir).
# When the spacy extensions are called the they check to see if a global stog_model is not None, and
# only call the loader if it's not already loaded.
T5V2_LOADED = None # one-shot to assure amrlib.stog_model is reloaded with this specific model
SPACY_NLP = None
class ModelParseT5v2(unittest.TestCase):
model_dir = os.path.join(data_dir, 'model_parse_t5-v0_2_0')
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
amrlib.setup_spacy_extension()
# Load/cache spacy
global SPACY_NLP
if SPACY_NLP is None:
SPACY_NLP = spacy.load('en_core_web_sm')
self.nlp = SPACY_NLP
# Load model in amrlib (amrlib will cache this itself)
global T5V2_LOADED
if T5V2_LOADED is None:
print('Loading', self.model_dir)
amrlib.load_stog_model(model_dir=self.model_dir)
T5V2_LOADED = True
self.stog = amrlib.stog_model

def testStoG(self):
graphs = self.stog.parse_sents(['This is a test of the system.'])
self.assertEqual(len(graphs), 1)

def testSpaCyDoc(self):
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence')
graphs = doc._.to_amr()
self.assertEqual(len(graphs), 2)

def testSpaCySpan(self):
doc = self.nlp('This is a test of the SpaCy extension. The test has multiple sentence')
span = list(doc.sents)[0] # first sentence only
graphs = span._.to_amr()
self.assertEqual(len(graphs), 1)


if __name__ == '__main__':
level = logging.WARNING
format = '[%(levelname)s %(filename)s ln=%(lineno)s] %(message)s'
logging.basicConfig(level=level, format=format)

# run all methods that start with 'test'
unittest.main()
5 changes: 4 additions & 1 deletion tests/manual/Test_RBW_Alignment_String_Generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
from amrlib.alignments.penman_utils import test_for_decode_encode_issue


# 11/27/2021: This test is currently broken


# Manual test to see if amrlib can generate the alignment string from surface alignments correctly,
# using the LDC data as the baseline
# !! Note that you must first create the test corpus. See the scripts directory for this
# !! Note that you must first create the test corpus. See the scripts directory/Build_Aligment_Test_Corpus.py
if __name__ == '__main__':
fname = 'amrlib/data/alignments/test_w_surface.txt'
entries = load_amr_entries(fname)
Expand Down

0 comments on commit dc0b792

Please sign in to comment.