Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#29 Remap RDM to Lingo ETL #33

Merged
merged 4 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions arches_lingo/const.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,34 @@
### Concepts Model Nodes & Nodegroups ###
CONCEPTS_GRAPH_ID = "bf73e576-4888-11ee-8a8d-11afefc4bff7"
SCHEMES_GRAPH_ID = "56788995-423b-11ee-8a8d-11afefc4bff7"

TOP_CONCEPT_OF_NODE_AND_NODEGROUP = "bf73e5b9-4888-11ee-8a8d-11afefc4bff7"

BROADER_NODE_AND_NODEGROUP = "bf73e5f5-4888-11ee-8a8d-11afefc4bff7"
BROADER_NODE_AND_NODEGROUP = "f3f7bbea-0eb9-11ef-93db-0a58a9feac02" # classification_status_ascribed_classification

CONCEPT_NAME_NODEGROUP = "bf73e616-4888-11ee-8a8d-11afefc4bff7"
# appellative_status
CONCEPT_NAME_NODEGROUP = "ab9fee9c-0eb6-11ef-93db-0a58a9feac02"
# appellative_status_ascribed_name_content
CONCEPT_NAME_CONTENT_NODE = "bf73e695-4888-11ee-8a8d-11afefc4bff7"
CONCEPT_NAME_TYPE_NODE = "b08eebb4-d44c-11ee-a986-0242ac130005"
CONCEPT_NAME_LANGUAGE_NODE = "444b7de6-d44c-11ee-8fe3-0242ac130005"
# appellative_status_ascribed_name_language
CONCEPT_NAME_LANGUAGE_NODE = "a8ecaf54-0eb7-11ef-93db-0a58a9feac02"
# appellative_status_ascribed_relation
CONCEPT_NAME_TYPE_NODE = "1ddffab4-0eb8-11ef-93db-0a58a9feac02"

CONCEPTS_PART_OF_SCHEME_NODEGROUP_ID = "bf73e60a-4888-11ee-8a8d-11afefc4bff7"


SCHEME_NAME_NODEGROUP = "749a27cf-423c-11ee-8a8d-11afefc4bff7"
SCHEME_NAME_CONTENT_NODE = "749a27d5-423c-11ee-8a8d-11afefc4bff7"
SCHEME_NAME_TYPE_NODE = "1330cc4c-d44d-11ee-9261-0242ac130005"
### Scheme Model Nodes & Nodegroups ###
SCHEMES_GRAPH_ID = "56788995-423b-11ee-8a8d-11afefc4bff7"

# appellative_status
SCHEME_NAME_NODEGROUP = "ab9fee9c-0eb6-11ef-93db-0a58a9feac02"
# appellative_status_ascribed_name_content
SCHEME_NAME_CONTENT_NODE = "a8ecaf54-0eb7-11ef-93db-0a58a9feac02"
# appellative_status_ascribed_name_language
SCHEME_NAME_LANGUAGE_NODE = "2deaf45e-d44d-11ee-b78d-0242ac130005"
# appellative_status_ascribed_relation
SCHEME_NAME_TYPE_NODE = "1ddffab4-0eb8-11ef-93db-0a58a9feac02"


PREF_LABEL_VALUE_ID = "3b8a03f1-9047-48e4-9ca0-b3fe887f6f9d"
ALT_LABEL_VALUE_ID = "c02f97c5-da16-4ff0-864a-92c34da84e38"
Expand Down
57 changes: 37 additions & 20 deletions arches_lingo/etl_modules/migrate_to_lingo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@
from arches.app.models.models import LoadStaging, NodeGroup, LoadEvent
from arches.app.utils.betterJSONSerializer import JSONSerializer
import arches_lingo.tasks as tasks
from arches_lingo.const import (
SCHEMES_GRAPH_ID,
CONCEPTS_GRAPH_ID,
TOP_CONCEPT_OF_NODE_AND_NODEGROUP,
BROADER_NODE_AND_NODEGROUP,
CONCEPTS_PART_OF_SCHEME_NODEGROUP_ID,
)

logger = logging.getLogger(__name__)

#### Constants ####
SCHEMES_GRAPH_ID = uuid.UUID("56788995-423b-11ee-8a8d-11afefc4bff7")
CONCEPTS_GRAPH_ID = uuid.UUID("bf73e576-4888-11ee-8a8d-11afefc4bff7")
CONCEPTS_TOP_CONCEPT_OF_NODEGROUP_ID = uuid.UUID("bf73e5b9-4888-11ee-8a8d-11afefc4bff7")
CONCEPTS_BROADER_NODEGROUP_ID = uuid.UUID("bf73e5f5-4888-11ee-8a8d-11afefc4bff7")
CONCEPTS_PART_OF_SCHEME_NODEGROUP_ID = uuid.UUID("bf73e60a-4888-11ee-8a8d-11afefc4bff7")

details = {
"etlmoduleid": "11cad3ca-e155-44b1-9910-c50b3def47f6",
"name": "Migrate to Lingo",
Expand Down Expand Up @@ -62,16 +62,25 @@ def etl_schemes(self, cursor, nodegroup_lookup, node_lookup):
concept.pk
) # use old conceptid as new resourceinstanceid

name = {}
appellative_status = {}
identifier = {}
if (
value.valuetype_id == "prefLabel"
or value.valuetype_id == "altLabel"
):
name["name_content"] = value.value
name["name_language"] = value.language_id
name["name_type"] = value.valuetype_id
scheme_to_load["tile_data"].append({"name": name})
appellative_status["appellative_status_ascribed_name_content"] = (
value.value
)

appellative_status["appellative_status_ascribed_name_language"] = (
value.language_id
)
appellative_status["appellative_status_ascribed_relation"] = (
value.valuetype_id
)
scheme_to_load["tile_data"].append(
{"appellative_status": appellative_status}
)
elif value.valuetype_id == "identifier":
identifier["identifier_content"] = value.value
identifier["identifier_type"] = value.valuetype_id
Expand All @@ -90,16 +99,24 @@ def etl_concepts(self, cursor, nodegroup_lookup, node_lookup):
concept.pk
) # use old conceptid as new resourceinstanceid

name = {}
appellative_status = {}
identifier = {}
if (
value.valuetype_id == "prefLabel"
or value.valuetype_id == "altLabel"
):
name["name_content"] = value.value
name["name_language"] = value.language_id
name["name_type"] = value.valuetype_id
concept_to_load["tile_data"].append({"name": name})
appellative_status["appellative_status_ascribed_name_content"] = (
value.value
)
appellative_status["appellative_status_ascribed_name_language"] = (
value.language_id
)
appellative_status["appellative_status_ascribed_relation"] = (
value.valuetype_id
)
concept_to_load["tile_data"].append(
{"appellative_status": appellative_status}
)
elif value.valuetype_id == "identifier":
identifier["identifier_content"] = value.value
identifier["identifier_type"] = value.valuetype_id
Expand Down Expand Up @@ -242,9 +259,9 @@ def init_relationships(self, cursor, loadid):
where relationtype = 'hasTopConcept';
""",
(
CONCEPTS_TOP_CONCEPT_OF_NODEGROUP_ID,
TOP_CONCEPT_OF_NODE_AND_NODEGROUP,
loadid,
CONCEPTS_TOP_CONCEPT_OF_NODEGROUP_ID,
TOP_CONCEPT_OF_NODE_AND_NODEGROUP,
),
)

Expand Down Expand Up @@ -283,7 +300,7 @@ def init_relationships(self, cursor, loadid):
from relations
where relationtype = 'narrower';
""",
(CONCEPTS_BROADER_NODEGROUP_ID, loadid, CONCEPTS_BROADER_NODEGROUP_ID),
(BROADER_NODE_AND_NODEGROUP, loadid, BROADER_NODE_AND_NODEGROUP),
)

# Create Part of Scheme relationships - derived by recursively generating concept hierarchy & associating
Expand Down
16 changes: 8 additions & 8 deletions arches_lingo/pkg/graphs/resource_models/Concept.json
Original file line number Diff line number Diff line change
Expand Up @@ -3470,16 +3470,16 @@
"config": {
"descriptor_types": {
"description": {
"nodegroup_id": "802bc768-19a6-11ee-8f04-cd21e680a247",
"string_template": "<statement_content>"
"nodegroup_id": "bf73e5d7-4888-11ee-8a8d-11afefc4bff7",
"string_template": "<statement>"
},
"map_popup": {
"nodegroup_id": "",
"string_template": ""
},
"name": {
"nodegroup_id": "bf73e616-4888-11ee-8a8d-11afefc4bff7",
"string_template": "<name_content>"
"nodegroup_id": "ab9fee9c-0eb6-11ef-93db-0a58a9feac02",
"string_template": "<appellative_status_ascribed_name_content>"
}
},
"triggering_nodegroups": []
Expand Down Expand Up @@ -6337,9 +6337,9 @@
}
],
"metadata": {
"db": "PostgreSQL 14.10 on aarch64-unknown-linux-gnu, compiled by gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-6), 64-bit",
"git hash": "/bin/sh: 1: git: not found",
"os": "Linux",
"os version": "5.10.215-203.850.amzn2.x86_64"
"db": "PostgreSQL 14.5 on x86_64-apple-darwin20.6.0, compiled by Apple clang version 12.0.0 (clang-1200.0.32.29), 64-bit",
"git hash": "728621149 2024-06-27 09:32:13 -0700",
"os": "Darwin",
"os version": "23.5.0"
}
}
8 changes: 4 additions & 4 deletions arches_lingo/pkg/graphs/resource_models/Scheme.json
Original file line number Diff line number Diff line change
Expand Up @@ -2069,16 +2069,16 @@
"config": {
"descriptor_types": {
"description": {
"nodegroup_id": "",
"string_template": ""
"nodegroup_id": "7131bc72-11e0-11ef-9493-0a58a9feac02",
"string_template": "<statement>"
},
"map_popup": {
"nodegroup_id": "",
"string_template": ""
},
"name": {
"nodegroup_id": "749a27cf-423c-11ee-8a8d-11afefc4bff7",
"string_template": "<name_content>"
"nodegroup_id": "ef87ac28-11de-11ef-9493-0a58a9feac02",
"string_template": "<appellative_status_ascribed_name_content>"
}
},
"triggering_nodegroups": []
Expand Down
Loading
Loading