Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove specificity from default compare #963

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion followthemoney/types/address.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def clean_text(
return collapsed

def _specificity(self, value: str) -> float:
return dampen(10, 60, value)
return dampen(10, 30, value)

def node_id(self, value: str) -> Optional[str]:
slug = slugify(value)
Expand Down
3 changes: 3 additions & 0 deletions followthemoney/types/country.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,8 @@ def clean_text(
def country_hint(self, value: str) -> str:
return value

def compare(self, left: str, right: str) -> float:
return 1.0 if left == right else 0.0

def rdf(self, value: str) -> Identifier:
return URIRef(f"iso-3166-1:{value}")
5 changes: 5 additions & 0 deletions followthemoney/types/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,10 @@ def clean_text(
# def country_hint(self, value)
# TODO: do we want to use TLDs as country evidence?

def compare(self, left: str, right: str) -> float:
if left.lower() == right.lower():
return 1.0
return 0.0

def rdf(self, value: str) -> Identifier:
return URIRef("mailto:%s" % value.lower())
3 changes: 3 additions & 0 deletions followthemoney/types/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def clean_text(
return text
return None

def compare(self, left: str, right: str) -> float:
return 1.0 if left == right else 0.0

def rdf(self, value: str) -> Identifier:
return URIRef(f"entity:{value}")

Expand Down
3 changes: 3 additions & 0 deletions followthemoney/types/gender.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,8 @@ def clean_text(
return None
return code

def compare(self, left: str, right: str) -> float:
return 1.0 if left == right else 0.0

def rdf(self, value: str) -> Identifier:
return URIRef(f"gender:{value}")
3 changes: 3 additions & 0 deletions followthemoney/types/iban.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ def clean_text(
def country_hint(self, value: str) -> str:
return value[:2].lower()

def compare(self, left: str, right: str) -> float:
return 1.0 if left == right else 0.0

def rdf(self, value: str) -> Identifier:
return URIRef(self.node_id(value))

Expand Down
3 changes: 3 additions & 0 deletions followthemoney/types/ip.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,8 @@ def clean_text(
except ValueError:
return None

def _specificity(self, value: str) -> float:
return 1.0

def rdf(self, value: str) -> Identifier:
return URIRef(f"ip:{value}")
3 changes: 3 additions & 0 deletions followthemoney/types/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,8 @@ def clean_text(
return None
return code

def _specificity(self, value: str) -> float:
return 1.0

def rdf(self, value: str) -> Identifier:
return URIRef(f"iso-639:{value}")
3 changes: 3 additions & 0 deletions followthemoney/types/mimetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ def clean_text(
return text
return None

def _specificity(self, value: str) -> float:
return 1.0

def rdf(self, value: str) -> Identifier:
return URIRef(f"urn:mimetype:{value}")

Expand Down
3 changes: 3 additions & 0 deletions followthemoney/types/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ class HTMLType(StringType):
label = _("HTML")
plural = _("HTMLs")
max_size = 30 * MEGABYTE

def compare(self, left: str, right: str) -> float:
return 0.0
3 changes: 3 additions & 0 deletions followthemoney/types/topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,5 +70,8 @@ class TopicType(EnumType):
def _locale_names(self, locale: Locale) -> EnumValues:
return {k: gettext(v) for (k, v) in self._TOPICS.items()}

def compare(self, left: str, right: str) -> float:
return 1.0 if left == right else 0.0

def rdf(self, value: str) -> Identifier:
return URIRef(f"ftm:topic:{value}")
6 changes: 4 additions & 2 deletions followthemoney/types/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ def clean_text(
parsed = parsed._replace(path="/")
return parsed.geturl()

def _specificity(self, value: str) -> float:
return dampen(10, 120, value)
def compare(self, left: str, right: str) -> float:
if left.lower() == right.lower():
return 1.0
return 0.0

def rdf(self, value: str) -> Identifier:
return URIRef(value)
Expand Down