Skip to content

Commit

Permalink
Allow duplicate datasets in input
Browse files Browse the repository at this point in the history
Even if two Datasets that are read in have
exactly the same information, do not treat them
as equal. This preserves more of the input file
structure (rather than combining identical datasets
into a single ID). Closes #127.
  • Loading branch information
benmwebb committed Dec 6, 2023
1 parent 833f37a commit c1e79c2
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 4 deletions.
12 changes: 8 additions & 4 deletions ihm/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ class Dataset(object):
"""

_eq_keys = ['location']
_allow_duplicates = False

# Datasets compare equal iff they are the same class and have the
# same attributes
# Datasets compare equal iff they are the same class, have the
# same attributes, and allow_duplicates=False
def _eq_vals(self):
return tuple([self.__class__]
+ [getattr(self, x) for x in self._eq_keys])
if self._allow_duplicates:
return id(self)
else:
return tuple([self.__class__]
+ [getattr(self, x) for x in self._eq_keys])

def __eq__(self, other):
return self._eq_vals() == other._eq_vals()
Expand Down
1 change: 1 addition & 0 deletions ihm/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1384,6 +1384,7 @@ def __call__(self, data_type, id, details):
f = self.sysr.datasets.get_by_id(
id, self.type_map.get(typ, ihm.dataset.Dataset))
f.details = details
f._allow_duplicates = True


class _DatasetGroupHandler(Handler):
Expand Down
11 changes: 11 additions & 0 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ def test_dataset(self):
d.parents.append(td)
self.assertEqual(len(d.parents), 2)

def test_dataset_allow_duplicates(self):
"""Test Dataset base class with allow_duplicates=True"""
loc = ihm.location.PDBLocation('1abc', version='foo', details='bar')
d = ihm.dataset.Dataset(loc)
d._allow_duplicates = True
self.assertEqual(d._eq_vals(), id(d))

d2 = ihm.dataset.Dataset(loc)
d2._allow_duplicates = True
self.assertNotEqual(d, d2)

def test_add_primary_no_parents(self):
"""Test add_primary() method, no parents"""
l1 = ihm.location.PDBLocation('1abc', version='foo', details='bar')
Expand Down
3 changes: 3 additions & 0 deletions test/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,9 @@ def test_dataset_list_handler(self):
self.assertEqual(d1.__class__, ihm.dataset.PDBDataset)
self.assertEqual(d2.__class__, ihm.dataset.ComparativeModelDataset)
self.assertEqual(d3.__class__, ihm.dataset.EMMicrographsDataset)
self.assertTrue(d1._allow_duplicates)
self.assertTrue(d2._allow_duplicates)
self.assertTrue(d3._allow_duplicates)
# No specified data type - use base class
self.assertEqual(d4.__class__, ihm.dataset.Dataset)
self.assertIsNone(d1.details)
Expand Down

0 comments on commit c1e79c2

Please sign in to comment.