Skip to content

Commit

Permalink
- Added deepcopy´s to all from_json() methods to enforce immutability…
Browse files Browse the repository at this point in the history
… of the input data and implemented first mutability tests (see #2).

- Added Dataset.to_json() test, closes #1
  • Loading branch information
devfoo-one committed Feb 10, 2020
1 parent aceb709 commit 5a5fb12
Show file tree
Hide file tree
Showing 9 changed files with 39 additions and 8 deletions.
2 changes: 2 additions & 0 deletions texoopy/model/Annotation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import json

from texoopy.model.Span import Span
Expand All @@ -13,6 +14,7 @@ def __init__(self, **kwargs):

@classmethod
def from_json(cls, json_data: dict):
json_data = copy.deepcopy(json_data)
from texoopy.model.MentionAnnotation import MentionAnnotation
from texoopy.model.NamedEntityAnnotation import NamedEntityAnnotation
if json_data['class'] == 'MentionAnnotation':
Expand Down
2 changes: 2 additions & 0 deletions texoopy/model/Dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ def __init__(self, **kwargs):
self.name: str = kwargs.get('name')
self.language: str = kwargs.get('language')
self.documents: list = []
self.queries: list = []

@classmethod
def from_json(cls, json_data: dict):
json_data = copy.deepcopy(json_data)
dataset = cls(**json_data)
for doc_json_data in json_data.get('documents', []):
dataset.documents.append(Document.from_json(doc_json_data))
Expand Down
10 changes: 6 additions & 4 deletions texoopy/model/Document.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import json

from texoopy.model.Annotation import Annotation
Expand All @@ -17,28 +18,29 @@ def __init__(self, **kwargs):

@classmethod
def from_json(cls, json_data: dict, do_sentence_splitting=False):

json_data = copy.deepcopy(json_data)
if json_data.get('class') != 'Document':
raise NotATeXooDocumentException('')
raise NotATeXooDocumentException('Supplied JSON is not a valid TeXoo document.')

annotations = []
for json_data_annotation in json_data.get('annotations'):
annotations.append(Annotation.from_json(json_data_annotation))
json_data['annotations'] = annotations

if do_sentence_splitting:
pass # TODO add sentence splitting
raise NotImplementedError("Sentence splitting is not implemented yet.") # TODO add sentence splitting

return cls(**json_data)

def to_json(self):
content = self.to_texoo_dict()
content['class'] = 'Document'
return json.dumps(content, default=lambda o: o.to_texoo_dict())

def to_texoo_dict(self) -> dict:
content = super().to_texoo_dict()
content['class'] = 'Document'
return content


class NotATeXooDocumentException(Exception):
pass
2 changes: 2 additions & 0 deletions texoopy/model/MentionAnnotation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import json

from texoopy.model.Annotation import Annotation
Expand All @@ -12,6 +13,7 @@ def __init__(self, **kwargs):

@classmethod
def from_json(cls, json_data: dict):
json_data = copy.deepcopy(json_data)
return cls(**json_data)

def to_json(self):
Expand Down
2 changes: 2 additions & 0 deletions texoopy/model/NamedEntityAnnotation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import json

from texoopy.model.Annotation import Annotation
Expand All @@ -12,6 +13,7 @@ def __init__(self, **kwargs):

@classmethod
def from_json(cls, json_data: dict):
json_data = copy.deepcopy(json_data)
return cls(**json_data)

def to_json(self):
Expand Down
2 changes: 2 additions & 0 deletions texoopy/model/Sentence.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import json

from texoopy.model.Span import Span
Expand All @@ -12,6 +13,7 @@ def __init__(self, **kwargs):

@classmethod
def from_json(cls, json_data: dict, do_tokenization=False):
json_data = copy.deepcopy(json_data)
if do_tokenization:
pass # TODO implement me (later)
token_json_data = json_data.pop('tokens', [])
Expand Down
1 change: 1 addition & 0 deletions texoopy/model/Span.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def to_texoo_dict(self) -> dict:

@classmethod
def from_json(cls, json_data: dict):
json_data = copy.deepcopy(json_data)
return cls(**json_data)

def to_json(self):
Expand Down
2 changes: 2 additions & 0 deletions texoopy/model/Token.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import json

from texoopy.model.Span import Span
Expand All @@ -10,6 +11,7 @@ def __init__(self, **kwargs):

@classmethod
def from_json(cls, json_data: dict):
json_data = copy.deepcopy(json_data)
return cls(**json_data) # TODO IMPLEMENT ME

def to_json(self):
Expand Down
24 changes: 20 additions & 4 deletions texoopy/tests/model/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ class DatasetTest(unittest.TestCase):

def setUp(self) -> None:
with open('res/texoo_dataset.json', 'r') as test_file:
self.dataset = Dataset.from_json(json.load(test_file))
self.dataset_json_dict = json.load(test_file)
self.dataset = Dataset.from_json(self.dataset_json_dict)

def test_dataset_instance(self):
self.assertIsInstance(self.dataset, Dataset)
Expand All @@ -29,6 +30,9 @@ def test_dataset_document_count(self):
self.assertEqual(3, len(self.dataset.documents))

def test_dataset_to_json(self):
self.assertEqual(self.dataset_json_dict, json.loads(self.dataset.to_json()))

def test_dataset_toJson_mutability(self):
pass # TODO IMPLEMENT ME


Expand Down Expand Up @@ -79,11 +83,17 @@ def test_illegal_teXoo_json_exception(self):
Document.from_json({'class': 'Span'})

def test_document_to_json(self):
orig = self.document_json_dict # TODO RMD
parsed = json.loads(self.document_1.to_json()) # TODO RMD

self.assertEqual(self.document_json_dict, json.loads(self.document_1.to_json()))

def test_document_toJson_mutability(self):
before = copy.deepcopy(self.document_1.__dict__)
self.document_1.to_json()
after = copy.deepcopy(self.document_1.__dict__)
# Annotations should get tested in a separate test so here we only compare the correct count.
before['annotations'] = len(before.get('annotations'))
after['annotations'] = len(after.get('annotations'))
self.assertEqual(before, after)


class MentionAnnotationTest(unittest.TestCase):

Expand Down Expand Up @@ -119,6 +129,9 @@ def test_mention_ann_type(self):
def test_mention_ann_to_json(self):
self.assertEqual(self.mention_ann_1_json_dict, json.loads(self.mention_ann_1.to_json()))

def test_mention_ann_toJson_mutability(self):
pass # TODO IMPLEMENT ME


class NamedEntityAnnotationTest(unittest.TestCase):

Expand Down Expand Up @@ -151,6 +164,9 @@ def test_named_ann_refId(self):
def test_named_ann_to_json(self):
self.assertEqual(self.named_entity_ann_json_dict, json.loads(self.named_entity_ann.to_json()))

def test_named_ann_toJson_mutability(self):
pass # TODO IMPLEMENT ME


if __name__ == '__main__':
unittest.main()

0 comments on commit 5a5fb12

Please sign in to comment.