Skip to content

Commit

Permalink
Merge pull request #705 from alephdata/death-cutoff-helper
Browse files Browse the repository at this point in the history
Remove persons that have been long dead
  • Loading branch information
pudo authored May 30, 2022
2 parents 94106d3 + 3a3f49c commit 31541cb
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 1 deletion.
23 changes: 23 additions & 0 deletions followthemoney/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from normality import safe_filename
from mimetypes import guess_extension
from itertools import product
from datetime import datetime, timedelta

from followthemoney.types import registry
from followthemoney.proxy import E
Expand Down Expand Up @@ -83,6 +84,28 @@ def name_entity(entity: E) -> E:
return entity


def check_person_cutoff(
entity: E,
death_cutoff: datetime = datetime(2000, 1, 1),
birth_cutoff: Optional[datetime] = None,
):
"""Check if a person has been dead long enough to not be relevant for
investigations any more."""
if not entity.schema.is_a("Person"):
return False
death_dates = entity.get("deathDate", quiet=True)
death_cutoff_ = death_cutoff.isoformat()
if len(death_dates) and max(death_dates) < death_cutoff_:
return True
birth_dates = entity.get("birthDate", quiet=True)
if birth_cutoff is None:
birth_cutoff = death_cutoff - timedelta(days=100 * 365)
birth_cutoff_ = birth_cutoff.isoformat()
if len(birth_dates) and min(birth_dates) < birth_cutoff_:
return True
return False


def remove_prefix_dates(entity: E) -> E:
"""If an entity has multiple values for a date field, you may
want to remove all those that are prefixes of others. For example,
Expand Down
24 changes: 23 additions & 1 deletion tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from followthemoney.helpers import combine_names, remove_checksums
from followthemoney.helpers import simplify_provenance
from followthemoney.helpers import entity_filename
from followthemoney.helpers import name_entity
from followthemoney.helpers import name_entity, check_person_cutoff
from followthemoney.helpers import remove_prefix_dates


Expand Down Expand Up @@ -112,6 +112,28 @@ def test_name_entity(self):
name_entity(proxy)
assert ["Carl"] == proxy.get("name"), proxy.get("name")

def test_death_cutoff(self):
entity = model.make_entity("Company")
assert not check_person_cutoff(entity)

entity = model.make_entity("Person")
entity.add("birthDate", "1985")
assert not check_person_cutoff(entity)

entity = model.make_entity("Person")
entity.add("birthDate", "1985")
entity.add("deathDate", "2022")
assert not check_person_cutoff(entity)

entity = model.make_entity("Person")
entity.add("birthDate", "1800")
assert check_person_cutoff(entity)

entity = model.make_entity("Person")
entity.add("birthDate", "1985")
entity.add("deathDate", "2008")
assert not check_person_cutoff(entity)

def test_remove_prefix_dates(self):
proxy = model.get_proxy(
{
Expand Down

0 comments on commit 31541cb

Please sign in to comment.