Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement OpenRefine data extension API #579

Merged
merged 4 commits into from
Nov 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ jobs:
context: .
platforms: linux/amd64,linux/arm64
push: true
pull: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ test:
typecheck:
mypy --strict yente

check: typecheck integration-test unit-test
check: typecheck test
38 changes: 36 additions & 2 deletions tests/test_reconcile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ def test_reconcile_metadata():
assert data["identifierSpace"].startswith(url), data
assert len(data["defaultTypes"]) > 3, data
assert "suggest" in data, data
assert "extend" in data, data


def test_reconcile_post():
def test_reconcile_post_query():
queries = {"mutti": {"query": "Yevgeny Popov"}}
resp = client.post("/reconcile/default", data={"queries": json.dumps(queries)})
assert resp.status_code == 200, resp.text
Expand All @@ -23,8 +24,22 @@ def test_reconcile_post():
assert res[0]["id"] == "Q18634850", res


def test_reconcile_post_extend():
query = {"ids": ["Q7747"], "properties": [{"id": "name"}, {"id": "birthDate"}]}
resp = client.post("/reconcile/default", data={"extend": json.dumps(query)})
assert resp.status_code == 200, resp.text
data = resp.json()
assert len(data["meta"]) == 2
assert data["meta"][0]["id"] == "name", data["meta"]
assert "Q7747" in data["rows"], data
assert "name" in data["rows"]["Q7747"], data
names = data["rows"]["Q7747"]["name"]
assert len(names) > 0, names
assert "putin" in "".join([n["str"] for n in names]).lower(), names


def test_reconcile_invalid():
queries = {"mutti": {"query": 37473874}}
queries = {"mutti": {"type": "Banana"}}
resp = client.post("/reconcile/default", data={"queries": json.dumps(queries)})
assert resp.status_code == 400, resp.text

Expand Down Expand Up @@ -109,3 +124,22 @@ def test_reconcile_suggest_type_prefix_dummy():
assert "result" in data
res = data["result"]
assert len(res) == 0, data


def test_reconcile_extend_properties():
resp = client.get("/reconcile/default/extend/property?limit=5&type=LegalEntity")
assert resp.status_code == 200, resp.text
data = resp.json()
assert "type" in data
assert data["type"] == "LegalEntity", data
assert data["limit"] == 5, data
props = data["properties"]
assert len(props) == 5
ids = [p["id"] for p in props]
assert "name" in ids
assert "country" in ids


def test_reconcile_extend_properties_invalid_type():
resp = client.get("/reconcile/default/extend/property?limit=5&type=Banana")
assert resp.status_code == 400, resp.text
4 changes: 2 additions & 2 deletions yente/data/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from typing import Dict, List, Union, Optional
from typing import Any, Dict, List, Union, Optional
from pydantic import BaseModel, Field
from nomenklatura.matching.types import MatchingResult, FeatureDocs

Expand Down Expand Up @@ -83,7 +83,7 @@ class SearchResponse(ResultsResponse):
class EntityExample(BaseModel):
id: Optional[str] = Field(None, examples=["my-entity-id"])
schema_: str = Field(..., examples=["Person"], alias="schema")
properties: Dict[str, Union[str, List[str]]] = Field(
properties: Dict[str, Union[str, List[Any]]] = Field(
..., examples=[{"name": ["John Doe"]}]
)

Expand Down
2 changes: 1 addition & 1 deletion yente/data/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def from_example(cls, example: "EntityExample") -> "Entity":
log.warning(
"Invalid example property",
prop=prop_name,
value=str(values),
value=repr(values),
)
continue
obj.add(prop_name, values, cleaned=False, fuzzy=True)
Expand Down
77 changes: 76 additions & 1 deletion yente/data/freebase.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import List, Optional
from enum import Enum
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
from pydantic.networks import AnyHttpUrl
from followthemoney import model
Expand Down Expand Up @@ -88,6 +89,53 @@ class FreebasePropertySuggestResponse(FreebaseSuggestResponse):
result: List[FreebaseProperty]


class FreebaseExtendProperty(BaseModel):
id: str
name: str


class FreebaseExtendPropertiesResponse(BaseModel):
limit: int
type: str
properties: List[FreebaseExtendProperty]


class FreebaseRenderMethod(str, Enum):
raw = "raw"
caption = "caption"


class FreebaseExtendQueryPropertySettings(BaseModel):
limit: int = 0
render: FreebaseRenderMethod = FreebaseRenderMethod.caption


class FreebaseExtendQueryProperty(BaseModel):
id: str
settings: FreebaseExtendQueryPropertySettings = (
FreebaseExtendQueryPropertySettings()
)


class FreebaseExtendQuery(BaseModel):
ids: List[str]
properties: List[FreebaseExtendQueryProperty]


class FreebaseExtendResponseMeta(BaseModel):
id: str
name: str


class FreebaseExtendResponseValue(BaseModel):
str: str


class FreebaseExtendResponse(BaseModel):
meta: List[FreebaseExtendResponseMeta]
rows: Dict[str, Dict[str, List[FreebaseExtendResponseValue]]]


class FreebaseManifestView(BaseModel):
url: str

Expand All @@ -109,14 +157,41 @@ class FreebaseManifestSuggest(BaseModel):
property: FreebaseManifestSuggestType


class FreebaseManifestExtendProposeProperties(BaseModel):
service_url: AnyHttpUrl
service_path: str


class FreebaseManifestExtendPropertySettingChoice(BaseModel):
id: str
name: str


class FreebaseManifestExtendPropertySetting(BaseModel):
name: str
label: str
type: str
default: Any
help_text: str
choices: List[FreebaseManifestExtendPropertySettingChoice] = []


class FreebaseManifestExtend(BaseModel):
propose_properties: FreebaseManifestExtendProposeProperties
propose_settings: List[FreebaseManifestExtendPropertySetting]


class FreebaseManifest(BaseModel):
versions: List[str] = Field(..., examples=[["0.2"]])
name: str = Field(..., examples=[settings.TITLE])
identifierSpace: AnyHttpUrl
schemaSpace: AnyHttpUrl
documentation: AnyHttpUrl
batchSize: int
view: FreebaseManifestView
preview: FreebaseManifestPreview
suggest: FreebaseManifestSuggest
extend: FreebaseManifestExtend
defaultTypes: List[FreebaseType]


Expand Down
Loading