From 5a5fb12b75faba73ff473b4c2086a7901f1c8df5 Mon Sep 17 00:00:00 2001 From: devfoo Date: Wed, 8 Jan 2020 14:35:56 +0100 Subject: [PATCH] =?UTF-8?q?-=20Added=20deepcopy=C2=B4s=20to=20all=20from?= =?UTF-8?q?=5Fjson()=20methods=20to=20enforce=20immutability=20of=20the=20?= =?UTF-8?q?input=20data=20and=20implemented=20first=20mutability=20tests?= =?UTF-8?q?=20(see=20#2).=20-=20Added=20Dataset.to=5Fjson()=20test,=20clos?= =?UTF-8?q?es=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- texoopy/model/Annotation.py | 2 ++ texoopy/model/Dataset.py | 2 ++ texoopy/model/Document.py | 10 ++++++---- texoopy/model/MentionAnnotation.py | 2 ++ texoopy/model/NamedEntityAnnotation.py | 2 ++ texoopy/model/Sentence.py | 2 ++ texoopy/model/Span.py | 1 + texoopy/model/Token.py | 2 ++ texoopy/tests/model/tests.py | 24 ++++++++++++++++++++---- 9 files changed, 39 insertions(+), 8 deletions(-) diff --git a/texoopy/model/Annotation.py b/texoopy/model/Annotation.py index 938bf62..e79bfa2 100644 --- a/texoopy/model/Annotation.py +++ b/texoopy/model/Annotation.py @@ -1,3 +1,4 @@ +import copy import json from texoopy.model.Span import Span @@ -13,6 +14,7 @@ def __init__(self, **kwargs): @classmethod def from_json(cls, json_data: dict): + json_data = copy.deepcopy(json_data) from texoopy.model.MentionAnnotation import MentionAnnotation from texoopy.model.NamedEntityAnnotation import NamedEntityAnnotation if json_data['class'] == 'MentionAnnotation': diff --git a/texoopy/model/Dataset.py b/texoopy/model/Dataset.py index 07d6fab..5b8154a 100644 --- a/texoopy/model/Dataset.py +++ b/texoopy/model/Dataset.py @@ -9,9 +9,11 @@ def __init__(self, **kwargs): self.name: str = kwargs.get('name') self.language: str = kwargs.get('language') self.documents: list = [] + self.queries: list = [] @classmethod def from_json(cls, json_data: dict): + json_data = copy.deepcopy(json_data) dataset = cls(**json_data) for doc_json_data in json_data.get('documents', []): dataset.documents.append(Document.from_json(doc_json_data)) diff --git a/texoopy/model/Document.py b/texoopy/model/Document.py index 113e6fd..cb86d43 100644 --- a/texoopy/model/Document.py +++ b/texoopy/model/Document.py @@ -1,3 +1,4 @@ +import copy import json from texoopy.model.Annotation import Annotation @@ -17,9 +18,9 @@ def __init__(self, **kwargs): @classmethod def from_json(cls, json_data: dict, do_sentence_splitting=False): - + json_data = copy.deepcopy(json_data) if json_data.get('class') != 'Document': - raise NotATeXooDocumentException('') + raise NotATeXooDocumentException('Supplied JSON is not a valid TeXoo document.') annotations = [] for json_data_annotation in json_data.get('annotations'): @@ -27,18 +28,19 @@ def from_json(cls, json_data: dict, do_sentence_splitting=False): json_data['annotations'] = annotations if do_sentence_splitting: - pass # TODO add sentence splitting + raise NotImplementedError("Sentence splitting is not implemented yet.") # TODO add sentence splitting return cls(**json_data) def to_json(self): content = self.to_texoo_dict() - content['class'] = 'Document' return json.dumps(content, default=lambda o: o.to_texoo_dict()) def to_texoo_dict(self) -> dict: content = super().to_texoo_dict() + content['class'] = 'Document' return content + class NotATeXooDocumentException(Exception): pass diff --git a/texoopy/model/MentionAnnotation.py b/texoopy/model/MentionAnnotation.py index b556ab3..90c38b0 100644 --- a/texoopy/model/MentionAnnotation.py +++ b/texoopy/model/MentionAnnotation.py @@ -1,3 +1,4 @@ +import copy import json from texoopy.model.Annotation import Annotation @@ -12,6 +13,7 @@ def __init__(self, **kwargs): @classmethod def from_json(cls, json_data: dict): + json_data = copy.deepcopy(json_data) return cls(**json_data) def to_json(self): diff --git a/texoopy/model/NamedEntityAnnotation.py b/texoopy/model/NamedEntityAnnotation.py index 7dfa39e..6f123a5 100644 --- a/texoopy/model/NamedEntityAnnotation.py +++ b/texoopy/model/NamedEntityAnnotation.py @@ -1,3 +1,4 @@ +import copy import json from texoopy.model.Annotation import Annotation @@ -12,6 +13,7 @@ def __init__(self, **kwargs): @classmethod def from_json(cls, json_data: dict): + json_data = copy.deepcopy(json_data) return cls(**json_data) def to_json(self): diff --git a/texoopy/model/Sentence.py b/texoopy/model/Sentence.py index 0baaa4a..fedf93d 100644 --- a/texoopy/model/Sentence.py +++ b/texoopy/model/Sentence.py @@ -1,3 +1,4 @@ +import copy import json from texoopy.model.Span import Span @@ -12,6 +13,7 @@ def __init__(self, **kwargs): @classmethod def from_json(cls, json_data: dict, do_tokenization=False): + json_data = copy.deepcopy(json_data) if do_tokenization: pass # TODO implement me (later) token_json_data = json_data.pop('tokens', []) diff --git a/texoopy/model/Span.py b/texoopy/model/Span.py index 2ec3771..7d09ecf 100644 --- a/texoopy/model/Span.py +++ b/texoopy/model/Span.py @@ -16,6 +16,7 @@ def to_texoo_dict(self) -> dict: @classmethod def from_json(cls, json_data: dict): + json_data = copy.deepcopy(json_data) return cls(**json_data) def to_json(self): diff --git a/texoopy/model/Token.py b/texoopy/model/Token.py index 9db5a2c..6b6515d 100644 --- a/texoopy/model/Token.py +++ b/texoopy/model/Token.py @@ -1,3 +1,4 @@ +import copy import json from texoopy.model.Span import Span @@ -10,6 +11,7 @@ def __init__(self, **kwargs): @classmethod def from_json(cls, json_data: dict): + json_data = copy.deepcopy(json_data) return cls(**json_data) # TODO IMPLEMENT ME def to_json(self): diff --git a/texoopy/tests/model/tests.py b/texoopy/tests/model/tests.py index 6561d1e..b06a5a4 100644 --- a/texoopy/tests/model/tests.py +++ b/texoopy/tests/model/tests.py @@ -14,7 +14,8 @@ class DatasetTest(unittest.TestCase): def setUp(self) -> None: with open('res/texoo_dataset.json', 'r') as test_file: - self.dataset = Dataset.from_json(json.load(test_file)) + self.dataset_json_dict = json.load(test_file) + self.dataset = Dataset.from_json(self.dataset_json_dict) def test_dataset_instance(self): self.assertIsInstance(self.dataset, Dataset) @@ -29,6 +30,9 @@ def test_dataset_document_count(self): self.assertEqual(3, len(self.dataset.documents)) def test_dataset_to_json(self): + self.assertEqual(self.dataset_json_dict, json.loads(self.dataset.to_json())) + + def test_dataset_toJson_mutability(self): pass # TODO IMPLEMENT ME @@ -79,11 +83,17 @@ def test_illegal_teXoo_json_exception(self): Document.from_json({'class': 'Span'}) def test_document_to_json(self): - orig = self.document_json_dict # TODO RMD - parsed = json.loads(self.document_1.to_json()) # TODO RMD - self.assertEqual(self.document_json_dict, json.loads(self.document_1.to_json())) + def test_document_toJson_mutability(self): + before = copy.deepcopy(self.document_1.__dict__) + self.document_1.to_json() + after = copy.deepcopy(self.document_1.__dict__) + # Annotations should get tested in a separate test so here we only compare the correct count. + before['annotations'] = len(before.get('annotations')) + after['annotations'] = len(after.get('annotations')) + self.assertEqual(before, after) + class MentionAnnotationTest(unittest.TestCase): @@ -119,6 +129,9 @@ def test_mention_ann_type(self): def test_mention_ann_to_json(self): self.assertEqual(self.mention_ann_1_json_dict, json.loads(self.mention_ann_1.to_json())) + def test_mention_ann_toJson_mutability(self): + pass # TODO IMPLEMENT ME + class NamedEntityAnnotationTest(unittest.TestCase): @@ -151,6 +164,9 @@ def test_named_ann_refId(self): def test_named_ann_to_json(self): self.assertEqual(self.named_entity_ann_json_dict, json.loads(self.named_entity_ann.to_json())) + def test_named_ann_toJson_mutability(self): + pass # TODO IMPLEMENT ME + if __name__ == '__main__': unittest.main()