From 5a5fb12b75faba73ff473b4c2086a7901f1c8df5 Mon Sep 17 00:00:00 2001
From: devfoo <tom@devfoo.de>
Date: Wed, 8 Jan 2020 14:35:56 +0100
Subject: [PATCH] =?UTF-8?q?-=20Added=20deepcopy=C2=B4s=20to=20all=20from?=
 =?UTF-8?q?=5Fjson()=20methods=20to=20enforce=20immutability=20of=20the=20?=
 =?UTF-8?q?input=20data=20and=20implemented=20first=20mutability=20tests?=
 =?UTF-8?q?=20(see=20#2).=20-=20Added=20Dataset.to=5Fjson()=20test,=20clos?=
 =?UTF-8?q?es=20#1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 texoopy/model/Annotation.py            |  2 ++
 texoopy/model/Dataset.py               |  2 ++
 texoopy/model/Document.py              | 10 ++++++----
 texoopy/model/MentionAnnotation.py     |  2 ++
 texoopy/model/NamedEntityAnnotation.py |  2 ++
 texoopy/model/Sentence.py              |  2 ++
 texoopy/model/Span.py                  |  1 +
 texoopy/model/Token.py                 |  2 ++
 texoopy/tests/model/tests.py           | 24 ++++++++++++++++++++----
 9 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/texoopy/model/Annotation.py b/texoopy/model/Annotation.py
index 938bf62..e79bfa2 100644
--- a/texoopy/model/Annotation.py
+++ b/texoopy/model/Annotation.py
@@ -1,3 +1,4 @@
+import copy
 import json
 
 from texoopy.model.Span import Span
@@ -13,6 +14,7 @@ def __init__(self, **kwargs):
 
     @classmethod
     def from_json(cls, json_data: dict):
+        json_data = copy.deepcopy(json_data)
         from texoopy.model.MentionAnnotation import MentionAnnotation
         from texoopy.model.NamedEntityAnnotation import NamedEntityAnnotation
         if json_data['class'] == 'MentionAnnotation':
diff --git a/texoopy/model/Dataset.py b/texoopy/model/Dataset.py
index 07d6fab..5b8154a 100644
--- a/texoopy/model/Dataset.py
+++ b/texoopy/model/Dataset.py
@@ -9,9 +9,11 @@ def __init__(self, **kwargs):
         self.name: str = kwargs.get('name')
         self.language: str = kwargs.get('language')
         self.documents: list = []
+        self.queries: list = []
 
     @classmethod
     def from_json(cls, json_data: dict):
+        json_data = copy.deepcopy(json_data)
         dataset = cls(**json_data)
         for doc_json_data in json_data.get('documents', []):
             dataset.documents.append(Document.from_json(doc_json_data))
diff --git a/texoopy/model/Document.py b/texoopy/model/Document.py
index 113e6fd..cb86d43 100644
--- a/texoopy/model/Document.py
+++ b/texoopy/model/Document.py
@@ -1,3 +1,4 @@
+import copy
 import json
 
 from texoopy.model.Annotation import Annotation
@@ -17,9 +18,9 @@ def __init__(self, **kwargs):
 
     @classmethod
     def from_json(cls, json_data: dict, do_sentence_splitting=False):
-
+        json_data = copy.deepcopy(json_data)
         if json_data.get('class') != 'Document':
-            raise NotATeXooDocumentException('')
+            raise NotATeXooDocumentException('Supplied JSON is not a valid TeXoo document.')
 
         annotations = []
         for json_data_annotation in json_data.get('annotations'):
@@ -27,18 +28,19 @@ def from_json(cls, json_data: dict, do_sentence_splitting=False):
         json_data['annotations'] = annotations
 
         if do_sentence_splitting:
-            pass  # TODO add sentence splitting
+            raise NotImplementedError("Sentence splitting is not implemented yet.")  # TODO add sentence splitting
 
         return cls(**json_data)
 
     def to_json(self):
         content = self.to_texoo_dict()
-        content['class'] = 'Document'
         return json.dumps(content, default=lambda o: o.to_texoo_dict())
 
     def to_texoo_dict(self) -> dict:
         content = super().to_texoo_dict()
+        content['class'] = 'Document'
         return content
 
+
 class NotATeXooDocumentException(Exception):
     pass
diff --git a/texoopy/model/MentionAnnotation.py b/texoopy/model/MentionAnnotation.py
index b556ab3..90c38b0 100644
--- a/texoopy/model/MentionAnnotation.py
+++ b/texoopy/model/MentionAnnotation.py
@@ -1,3 +1,4 @@
+import copy
 import json
 
 from texoopy.model.Annotation import Annotation
@@ -12,6 +13,7 @@ def __init__(self, **kwargs):
 
     @classmethod
     def from_json(cls, json_data: dict):
+        json_data = copy.deepcopy(json_data)
         return cls(**json_data)
 
     def to_json(self):
diff --git a/texoopy/model/NamedEntityAnnotation.py b/texoopy/model/NamedEntityAnnotation.py
index 7dfa39e..6f123a5 100644
--- a/texoopy/model/NamedEntityAnnotation.py
+++ b/texoopy/model/NamedEntityAnnotation.py
@@ -1,3 +1,4 @@
+import copy
 import json
 
 from texoopy.model.Annotation import Annotation
@@ -12,6 +13,7 @@ def __init__(self, **kwargs):
 
     @classmethod
     def from_json(cls, json_data: dict):
+        json_data = copy.deepcopy(json_data)
         return cls(**json_data)
 
     def to_json(self):
diff --git a/texoopy/model/Sentence.py b/texoopy/model/Sentence.py
index 0baaa4a..fedf93d 100644
--- a/texoopy/model/Sentence.py
+++ b/texoopy/model/Sentence.py
@@ -1,3 +1,4 @@
+import copy
 import json
 
 from texoopy.model.Span import Span
@@ -12,6 +13,7 @@ def __init__(self, **kwargs):
 
     @classmethod
     def from_json(cls, json_data: dict, do_tokenization=False):
+        json_data = copy.deepcopy(json_data)
         if do_tokenization:
             pass  # TODO implement me (later)
         token_json_data = json_data.pop('tokens', [])
diff --git a/texoopy/model/Span.py b/texoopy/model/Span.py
index 2ec3771..7d09ecf 100644
--- a/texoopy/model/Span.py
+++ b/texoopy/model/Span.py
@@ -16,6 +16,7 @@ def to_texoo_dict(self) -> dict:
 
     @classmethod
     def from_json(cls, json_data: dict):
+        json_data = copy.deepcopy(json_data)
         return cls(**json_data)
     
     def to_json(self):
diff --git a/texoopy/model/Token.py b/texoopy/model/Token.py
index 9db5a2c..6b6515d 100644
--- a/texoopy/model/Token.py
+++ b/texoopy/model/Token.py
@@ -1,3 +1,4 @@
+import copy
 import json
 
 from texoopy.model.Span import Span
@@ -10,6 +11,7 @@ def __init__(self, **kwargs):
 
     @classmethod
     def from_json(cls, json_data: dict):
+        json_data = copy.deepcopy(json_data)
         return cls(**json_data)  # TODO IMPLEMENT ME
 
     def to_json(self):
diff --git a/texoopy/tests/model/tests.py b/texoopy/tests/model/tests.py
index 6561d1e..b06a5a4 100644
--- a/texoopy/tests/model/tests.py
+++ b/texoopy/tests/model/tests.py
@@ -14,7 +14,8 @@ class DatasetTest(unittest.TestCase):
 
     def setUp(self) -> None:
         with open('res/texoo_dataset.json', 'r') as test_file:
-            self.dataset = Dataset.from_json(json.load(test_file))
+            self.dataset_json_dict = json.load(test_file)
+            self.dataset = Dataset.from_json(self.dataset_json_dict)
 
     def test_dataset_instance(self):
         self.assertIsInstance(self.dataset, Dataset)
@@ -29,6 +30,9 @@ def test_dataset_document_count(self):
         self.assertEqual(3, len(self.dataset.documents))
 
     def test_dataset_to_json(self):
+        self.assertEqual(self.dataset_json_dict, json.loads(self.dataset.to_json()))
+
+    def test_dataset_toJson_mutability(self):
         pass  # TODO IMPLEMENT ME
 
 
@@ -79,11 +83,17 @@ def test_illegal_teXoo_json_exception(self):
             Document.from_json({'class': 'Span'})
 
     def test_document_to_json(self):
-        orig = self.document_json_dict  # TODO RMD
-        parsed = json.loads(self.document_1.to_json()) # TODO RMD
-
         self.assertEqual(self.document_json_dict, json.loads(self.document_1.to_json()))
 
+    def test_document_toJson_mutability(self):
+        before = copy.deepcopy(self.document_1.__dict__)
+        self.document_1.to_json()
+        after = copy.deepcopy(self.document_1.__dict__)
+        # Annotations should get tested in a separate test so here we only compare the correct count.
+        before['annotations'] = len(before.get('annotations'))
+        after['annotations'] = len(after.get('annotations'))
+        self.assertEqual(before, after)
+
 
 class MentionAnnotationTest(unittest.TestCase):
 
@@ -119,6 +129,9 @@ def test_mention_ann_type(self):
     def test_mention_ann_to_json(self):
         self.assertEqual(self.mention_ann_1_json_dict, json.loads(self.mention_ann_1.to_json()))
 
+    def test_mention_ann_toJson_mutability(self):
+        pass  # TODO IMPLEMENT ME
+
 
 class NamedEntityAnnotationTest(unittest.TestCase):
 
@@ -151,6 +164,9 @@ def test_named_ann_refId(self):
     def test_named_ann_to_json(self):
         self.assertEqual(self.named_entity_ann_json_dict, json.loads(self.named_entity_ann.to_json()))
 
+    def test_named_ann_toJson_mutability(self):
+        pass  # TODO IMPLEMENT ME
+
 
 if __name__ == '__main__':
     unittest.main()