From c1e79c22fb83330eaadf81ad07a5cb3b5d2e8f27 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Tue, 5 Dec 2023 18:08:57 -0800 Subject: [PATCH] Allow duplicate datasets in input Even if two Datasets that are read in have exactly the same information, do not treat them as equal. This preserves more of the input file structure (rather than combining identical datasets into a single ID). Closes #127. --- ihm/dataset.py | 12 ++++++++---- ihm/reader.py | 1 + test/test_dataset.py | 11 +++++++++++ test/test_reader.py | 3 +++ 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/ihm/dataset.py b/ihm/dataset.py index c1504b4..3352af4 100644 --- a/ihm/dataset.py +++ b/ihm/dataset.py @@ -18,12 +18,16 @@ class Dataset(object): """ _eq_keys = ['location'] + _allow_duplicates = False - # Datasets compare equal iff they are the same class and have the - # same attributes + # Datasets compare equal iff they are the same class, have the + # same attributes, and allow_duplicates=False def _eq_vals(self): - return tuple([self.__class__] - + [getattr(self, x) for x in self._eq_keys]) + if self._allow_duplicates: + return id(self) + else: + return tuple([self.__class__] + + [getattr(self, x) for x in self._eq_keys]) def __eq__(self, other): return self._eq_vals() == other._eq_vals() diff --git a/ihm/reader.py b/ihm/reader.py index f2f2807..c0333cb 100644 --- a/ihm/reader.py +++ b/ihm/reader.py @@ -1384,6 +1384,7 @@ def __call__(self, data_type, id, details): f = self.sysr.datasets.get_by_id( id, self.type_map.get(typ, ihm.dataset.Dataset)) f.details = details + f._allow_duplicates = True class _DatasetGroupHandler(Handler): diff --git a/test/test_dataset.py b/test/test_dataset.py index 43bd489..9b461c0 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -40,6 +40,17 @@ def test_dataset(self): d.parents.append(td) self.assertEqual(len(d.parents), 2) + def test_dataset_allow_duplicates(self): + """Test Dataset base class with allow_duplicates=True""" + loc = ihm.location.PDBLocation('1abc', version='foo', details='bar') + d = ihm.dataset.Dataset(loc) + d._allow_duplicates = True + self.assertEqual(d._eq_vals(), id(d)) + + d2 = ihm.dataset.Dataset(loc) + d2._allow_duplicates = True + self.assertNotEqual(d, d2) + def test_add_primary_no_parents(self): """Test add_primary() method, no parents""" l1 = ihm.location.PDBLocation('1abc', version='foo', details='bar') diff --git a/test/test_reader.py b/test/test_reader.py index 7d9c14f..cf0c81a 100644 --- a/test/test_reader.py +++ b/test/test_reader.py @@ -971,6 +971,9 @@ def test_dataset_list_handler(self): self.assertEqual(d1.__class__, ihm.dataset.PDBDataset) self.assertEqual(d2.__class__, ihm.dataset.ComparativeModelDataset) self.assertEqual(d3.__class__, ihm.dataset.EMMicrographsDataset) + self.assertTrue(d1._allow_duplicates) + self.assertTrue(d2._allow_duplicates) + self.assertTrue(d3._allow_duplicates) # No specified data type - use base class self.assertEqual(d4.__class__, ihm.dataset.Dataset) self.assertIsNone(d1.details)