Skip to content

Commit

Permalink
Merge pull request #33 from archesproject/29_remap_RDM_to_Lingo_ETL
Browse files Browse the repository at this point in the history
#29 Remap RDM to Lingo ETL
  • Loading branch information
robgaston authored Jul 10, 2024
2 parents c696479 + 7d9aec1 commit ff0abbe
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 143 deletions.
31 changes: 23 additions & 8 deletions arches_lingo/const.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,34 @@
### Concepts Model Nodes & Nodegroups ###
CONCEPTS_GRAPH_ID = "bf73e576-4888-11ee-8a8d-11afefc4bff7"
SCHEMES_GRAPH_ID = "56788995-423b-11ee-8a8d-11afefc4bff7"

TOP_CONCEPT_OF_NODE_AND_NODEGROUP = "bf73e5b9-4888-11ee-8a8d-11afefc4bff7"

BROADER_NODE_AND_NODEGROUP = "bf73e5f5-4888-11ee-8a8d-11afefc4bff7"
BROADER_NODE_AND_NODEGROUP = "f3f7bbea-0eb9-11ef-93db-0a58a9feac02" # classification_status_ascribed_classification

CONCEPT_NAME_NODEGROUP = "bf73e616-4888-11ee-8a8d-11afefc4bff7"
# appellative_status
CONCEPT_NAME_NODEGROUP = "ab9fee9c-0eb6-11ef-93db-0a58a9feac02"
# appellative_status_ascribed_name_content
CONCEPT_NAME_CONTENT_NODE = "bf73e695-4888-11ee-8a8d-11afefc4bff7"
CONCEPT_NAME_TYPE_NODE = "b08eebb4-d44c-11ee-a986-0242ac130005"
CONCEPT_NAME_LANGUAGE_NODE = "444b7de6-d44c-11ee-8fe3-0242ac130005"
# appellative_status_ascribed_name_language
CONCEPT_NAME_LANGUAGE_NODE = "a8ecaf54-0eb7-11ef-93db-0a58a9feac02"
# appellative_status_ascribed_relation
CONCEPT_NAME_TYPE_NODE = "1ddffab4-0eb8-11ef-93db-0a58a9feac02"

CONCEPTS_PART_OF_SCHEME_NODEGROUP_ID = "bf73e60a-4888-11ee-8a8d-11afefc4bff7"


SCHEME_NAME_NODEGROUP = "749a27cf-423c-11ee-8a8d-11afefc4bff7"
SCHEME_NAME_CONTENT_NODE = "749a27d5-423c-11ee-8a8d-11afefc4bff7"
SCHEME_NAME_TYPE_NODE = "1330cc4c-d44d-11ee-9261-0242ac130005"
### Scheme Model Nodes & Nodegroups ###
SCHEMES_GRAPH_ID = "56788995-423b-11ee-8a8d-11afefc4bff7"

# appellative_status
SCHEME_NAME_NODEGROUP = "ab9fee9c-0eb6-11ef-93db-0a58a9feac02"
# appellative_status_ascribed_name_content
SCHEME_NAME_CONTENT_NODE = "a8ecaf54-0eb7-11ef-93db-0a58a9feac02"
# appellative_status_ascribed_name_language
SCHEME_NAME_LANGUAGE_NODE = "2deaf45e-d44d-11ee-b78d-0242ac130005"
# appellative_status_ascribed_relation
SCHEME_NAME_TYPE_NODE = "1ddffab4-0eb8-11ef-93db-0a58a9feac02"


PREF_LABEL_VALUE_ID = "3b8a03f1-9047-48e4-9ca0-b3fe887f6f9d"
ALT_LABEL_VALUE_ID = "c02f97c5-da16-4ff0-864a-92c34da84e38"
Expand Down
57 changes: 37 additions & 20 deletions arches_lingo/etl_modules/migrate_to_lingo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@
from arches.app.models.models import LoadStaging, NodeGroup, LoadEvent
from arches.app.utils.betterJSONSerializer import JSONSerializer
import arches_lingo.tasks as tasks
from arches_lingo.const import (
SCHEMES_GRAPH_ID,
CONCEPTS_GRAPH_ID,
TOP_CONCEPT_OF_NODE_AND_NODEGROUP,
BROADER_NODE_AND_NODEGROUP,
CONCEPTS_PART_OF_SCHEME_NODEGROUP_ID,
)

logger = logging.getLogger(__name__)

#### Constants ####
SCHEMES_GRAPH_ID = uuid.UUID("56788995-423b-11ee-8a8d-11afefc4bff7")
CONCEPTS_GRAPH_ID = uuid.UUID("bf73e576-4888-11ee-8a8d-11afefc4bff7")
CONCEPTS_TOP_CONCEPT_OF_NODEGROUP_ID = uuid.UUID("bf73e5b9-4888-11ee-8a8d-11afefc4bff7")
CONCEPTS_BROADER_NODEGROUP_ID = uuid.UUID("bf73e5f5-4888-11ee-8a8d-11afefc4bff7")
CONCEPTS_PART_OF_SCHEME_NODEGROUP_ID = uuid.UUID("bf73e60a-4888-11ee-8a8d-11afefc4bff7")

details = {
"etlmoduleid": "11cad3ca-e155-44b1-9910-c50b3def47f6",
"name": "Migrate to Lingo",
Expand Down Expand Up @@ -62,16 +62,25 @@ def etl_schemes(self, cursor, nodegroup_lookup, node_lookup):
concept.pk
) # use old conceptid as new resourceinstanceid

name = {}
appellative_status = {}
identifier = {}
if (
value.valuetype_id == "prefLabel"
or value.valuetype_id == "altLabel"
):
name["name_content"] = value.value
name["name_language"] = value.language_id
name["name_type"] = value.valuetype_id
scheme_to_load["tile_data"].append({"name": name})
appellative_status["appellative_status_ascribed_name_content"] = (
value.value
)

appellative_status["appellative_status_ascribed_name_language"] = (
value.language_id
)
appellative_status["appellative_status_ascribed_relation"] = (
value.valuetype_id
)
scheme_to_load["tile_data"].append(
{"appellative_status": appellative_status}
)
elif value.valuetype_id == "identifier":
identifier["identifier_content"] = value.value
identifier["identifier_type"] = value.valuetype_id
Expand All @@ -90,16 +99,24 @@ def etl_concepts(self, cursor, nodegroup_lookup, node_lookup):
concept.pk
) # use old conceptid as new resourceinstanceid

name = {}
appellative_status = {}
identifier = {}
if (
value.valuetype_id == "prefLabel"
or value.valuetype_id == "altLabel"
):
name["name_content"] = value.value
name["name_language"] = value.language_id
name["name_type"] = value.valuetype_id
concept_to_load["tile_data"].append({"name": name})
appellative_status["appellative_status_ascribed_name_content"] = (
value.value
)
appellative_status["appellative_status_ascribed_name_language"] = (
value.language_id
)
appellative_status["appellative_status_ascribed_relation"] = (
value.valuetype_id
)
concept_to_load["tile_data"].append(
{"appellative_status": appellative_status}
)
elif value.valuetype_id == "identifier":
identifier["identifier_content"] = value.value
identifier["identifier_type"] = value.valuetype_id
Expand Down Expand Up @@ -242,9 +259,9 @@ def init_relationships(self, cursor, loadid):
where relationtype = 'hasTopConcept';
""",
(
CONCEPTS_TOP_CONCEPT_OF_NODEGROUP_ID,
TOP_CONCEPT_OF_NODE_AND_NODEGROUP,
loadid,
CONCEPTS_TOP_CONCEPT_OF_NODEGROUP_ID,
TOP_CONCEPT_OF_NODE_AND_NODEGROUP,
),
)

Expand Down Expand Up @@ -283,7 +300,7 @@ def init_relationships(self, cursor, loadid):
from relations
where relationtype = 'narrower';
""",
(CONCEPTS_BROADER_NODEGROUP_ID, loadid, CONCEPTS_BROADER_NODEGROUP_ID),
(BROADER_NODE_AND_NODEGROUP, loadid, BROADER_NODE_AND_NODEGROUP),
)

# Create Part of Scheme relationships - derived by recursively generating concept hierarchy & associating
Expand Down
16 changes: 8 additions & 8 deletions arches_lingo/pkg/graphs/resource_models/Concept.json
Original file line number Diff line number Diff line change
Expand Up @@ -3470,16 +3470,16 @@
"config": {
"descriptor_types": {
"description": {
"nodegroup_id": "802bc768-19a6-11ee-8f04-cd21e680a247",
"string_template": "<statement_content>"
"nodegroup_id": "bf73e5d7-4888-11ee-8a8d-11afefc4bff7",
"string_template": "<statement>"
},
"map_popup": {
"nodegroup_id": "",
"string_template": ""
},
"name": {
"nodegroup_id": "bf73e616-4888-11ee-8a8d-11afefc4bff7",
"string_template": "<name_content>"
"nodegroup_id": "ab9fee9c-0eb6-11ef-93db-0a58a9feac02",
"string_template": "<appellative_status_ascribed_name_content>"
}
},
"triggering_nodegroups": []
Expand Down Expand Up @@ -6337,9 +6337,9 @@
}
],
"metadata": {
"db": "PostgreSQL 14.10 on aarch64-unknown-linux-gnu, compiled by gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-6), 64-bit",
"git hash": "/bin/sh: 1: git: not found",
"os": "Linux",
"os version": "5.10.215-203.850.amzn2.x86_64"
"db": "PostgreSQL 14.5 on x86_64-apple-darwin20.6.0, compiled by Apple clang version 12.0.0 (clang-1200.0.32.29), 64-bit",
"git hash": "728621149 2024-06-27 09:32:13 -0700",
"os": "Darwin",
"os version": "23.5.0"
}
}
8 changes: 4 additions & 4 deletions arches_lingo/pkg/graphs/resource_models/Scheme.json
Original file line number Diff line number Diff line change
Expand Up @@ -2069,16 +2069,16 @@
"config": {
"descriptor_types": {
"description": {
"nodegroup_id": "",
"string_template": ""
"nodegroup_id": "7131bc72-11e0-11ef-9493-0a58a9feac02",
"string_template": "<statement>"
},
"map_popup": {
"nodegroup_id": "",
"string_template": ""
},
"name": {
"nodegroup_id": "749a27cf-423c-11ee-8a8d-11afefc4bff7",
"string_template": "<name_content>"
"nodegroup_id": "ef87ac28-11de-11ef-9493-0a58a9feac02",
"string_template": "<appellative_status_ascribed_name_content>"
}
},
"triggering_nodegroups": []
Expand Down
Loading

0 comments on commit ff0abbe

Please sign in to comment.