From 2fa3197656f2eb9e76b0c74abbcbd5d760c27c52 Mon Sep 17 00:00:00 2001 From: Jennifer Tran Date: Thu, 16 May 2024 11:15:41 -0700 Subject: [PATCH 1/9] fix: formatting --- ...collection-and-item-workflows-ingest.ipynb | 398 ++++++++++++++++++ 1 file changed, 398 insertions(+) create mode 100644 transformation-scripts/collection-and-item-workflows-ingest.ipynb diff --git a/transformation-scripts/collection-and-item-workflows-ingest.ipynb b/transformation-scripts/collection-and-item-workflows-ingest.ipynb new file mode 100644 index 00000000..9db638b5 --- /dev/null +++ b/transformation-scripts/collection-and-item-workflows-ingest.ipynb @@ -0,0 +1,398 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook to Publish Collections and Start Discovery Workflow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook publishes the collections in `/ingestion-data/collections` excluding:\n", + "- 'hls-l30-002-ej-reprocessed'\n", + "- 'hls-s30-002-ej-reprocessed'\n", + "- 'ls8-covid-19-example-data'\n", + "- 'landsat-c2l2-sr-antarctic-glaciers-pine-island'\n", + "- 'landsat-c2l2-sr-lakes-aral-sea'\n", + "- 'landsat-c2l2-sr-lakes-tonle-sap'\n", + "- 'landsat-c2l2-sr-lakes-lake-balaton'\n", + "- 'landsat-c2l2-sr-lakes-vanern'\n", + "- 'landsat-c2l2-sr-antarctic-glaciers-thwaites'\n", + "- 'landsat-c2l2-sr-lakes-lake-biwa'\n", + "- 'combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "import os\n", + "import json\n", + "import requests" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell retrieves collection JSON files from `/ingestion-data/collections/` and save collectionIds to a list." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "excluded_collections = [\n", + " \"hls-l30-002-ej-reprocessed\",\n", + " \"hls-s30-002-ej-reprocessed\",\n", + " \"ls8-covid-19-example-data\",\n", + " \"landsat-c2l2-sr-antarctic-glaciers-pine-island\",\n", + " \"landsat-c2l2-sr-lakes-aral-sea\",\n", + " \"landsat-c2l2-sr-lakes-tonle-sap\",\n", + " \"landsat-c2l2-sr-lakes-lake-balaton\",\n", + " \"landsat-c2l2-sr-lakes-vanern\",\n", + " \"landsat-c2l2-sr-antarctic-glaciers-thwaites\",\n", + " \"landsat-c2l2-sr-lakes-lake-biwa\",\n", + " \"combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO\",\n", + "]\n", + "\n", + "json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", + "filtered_list = [\n", + " item\n", + " for item in json_file_paths\n", + " if all(\n", + " excluded_collections not in item\n", + " for excluded_collections in excluded_collections\n", + " )\n", + "]\n", + "\n", + "file_paths_and_collection_ids = [\n", + " {\"filePath\": file_path, \"collectionId\": data[\"id\"]}\n", + " for file_path in filtered_list\n", + " if \"id\" in (data := json.load(open(file_path, \"r\")))\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the testing mode to `True` when testing and `False` otherwise. When the testing mode is `True`, the notebook will be set to run against `dev` endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "testing_mode = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell compares files in '/ingestion/collections' with those in 'ingestion/staging/discovery-items' or 'ingestion/production/discovery-items' and returns a list of all the discovery-items that have a corresponding collection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def load_json_files_from_directory(directory):\n", + " json_files = []\n", + " for filename in os.listdir(directory):\n", + " if filename.endswith(\".json\"):\n", + " json_files.append(filename)\n", + " return json_files\n", + "\n", + "\n", + "def find_matching_file_names(collections_list, discovery_items_list):\n", + " matching_file_names = []\n", + " for collection_filename in collections_list:\n", + " collection_json = load_json_file(\n", + " os.path.join(collections_files, collection_filename)\n", + " )\n", + " id1 = collection_json.get(\"id\")\n", + " if id1 is not None:\n", + " for discovery_items_filename in discovery_items_list:\n", + " item_json = load_json_file(\n", + " os.path.join(discovery_items_files, discovery_items_filename)\n", + " )\n", + " if isinstance(item_json, list):\n", + " if len(item_json) > 0:\n", + " collection2 = item_json[0].get(\"collection\")\n", + " else:\n", + " collection2 = item_json.get(\"collection\")\n", + "\n", + " if collection2 is not None:\n", + " if collection2 == id1:\n", + " # Found a match\n", + " matching_file_names.append(discovery_items_filename)\n", + " # Further processing or comparison can be done here\n", + " break\n", + " return matching_file_names\n", + "\n", + "\n", + "def load_json_file(file_path):\n", + " with open(file_path, \"r\") as file:\n", + " return json.load(file)\n", + "\n", + "\n", + "collections_files = \"../ingestion-data/collections/\"\n", + "discovery_items_files = (\n", + " \"../ingestion-data/staging/discovery-items/\"\n", + " if testing_mode\n", + " else \"../ingestion-data/production/discovery-items/\"\n", + ")\n", + "\n", + "# Load JSON files from directories\n", + "json_files_dir1 = load_json_files_from_directory(collections_files)\n", + "json_files_dir2 = load_json_files_from_directory(discovery_items_files)\n", + "\n", + "# Find matching file names\n", + "matching_file_names = find_matching_file_names(json_files_dir1, json_files_dir2)\n", + "\n", + "# for file_pair in matching_file_names:\n", + "# print(\"Match found:\")\n", + "# print(\"File 1:\", file_pair[0])\n", + "# print(\"File 2:\", file_pair[1])\n", + "discovery_items_to_process = matching_file_names\n", + "print(discovery_items_to_process)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Have your Cognito `username` and `password` ready to set up Cognito Client to retrieve a token that will be used to access the STAC Ingestor API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_endpoint = \"https://test.openveda.cloud\"\n", + "test_client_id = \"CHANGE ME\"\n", + "test_user_pool_id = \"CHANGE ME\"\n", + "test_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "mcp_prod_endpoint = \"https://openveda.cloud\"\n", + "mcp_prod_client_id = \"CHANGE ME\"\n", + "mcp_prod_user_pool_id = \"CHANGE ME\"\n", + "mcp_prod_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "if testing_mode:\n", + " STAC_INGESTOR_API = f\"{test_endpoint}/api/ingest/\"\n", + " VEDA_STAC_API = f\"{test_endpoint}/api/stac/\"\n", + " WORKFLOWS_API = \"https://4hrks0hk0b.execute-api.us-west-2.amazonaws.com/docs/\"\n", + "else:\n", + " STAC_INGESTOR_API = f\"{mcp_prod_endpoint}/api/ingest/\"\n", + " VEDA_STAC_API = f\"{mcp_prod_endpoint}/api/stac/\"\n", + " WORKFLOWS_API = \"https://bct2n8in53.execute-api.us-west-2.amazonaws.com/docs/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell sets up headers for requests." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "TOKEN = \"REPLACE ME\"\n", + "authorization_header = f\"Bearer {TOKEN}\"\n", + "headers = {\n", + " \"Authorization\": authorization_header,\n", + " \"content-type\": \"application/json\",\n", + " \"accept\": \"application/json\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell defines the function that will post the collection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def post_collection(collection, collection_id):\n", + " collection_url = f\"{VEDA_STAC_API}collections/{collection_id}\"\n", + " ingest_url = f\"{STAC_INGESTOR_API}collections\"\n", + "\n", + " try:\n", + " response = requests.post(ingest_url, json=collection, headers=headers)\n", + " response.raise_for_status()\n", + " if response.status_code == 201:\n", + " print(\n", + " f\"Request was successful. Find the updated collection at {collection_url}\"\n", + " )\n", + " else:\n", + " print(\n", + " f\"Updating {collection_id} failed. Request failed with status code: {response.status_code}\"\n", + " )\n", + " except requests.RequestException as e:\n", + " print(\n", + " f\"Updating {collection_id} failed. An error occurred during the request: {e}\"\n", + " )\n", + " except Exception as e:\n", + " print(\n", + " f\"An unexpected error occurred while trying to update {collection_id}: {e}\"\n", + " )\n", + "\n", + "\n", + "def ingest_item(item):\n", + " discovery_url = f\"{WORKFLOWS_API}/discovery\"\n", + " try:\n", + " response = requests.post(discovery_url, json=item, headers=headers)\n", + " response.raise_for_status()\n", + " if response.status_code == 201:\n", + " print(f\"Request was successful. \")\n", + " else:\n", + " print(\n", + " f\"Kicking off discovery for {item} failed. Request failed with status code: {response.status_code}\"\n", + " )\n", + " except requests.RequestException as e:\n", + " print(\n", + " f\"Kicking off discovery for {item} failed. An error occurred during the request: {e}\"\n", + " )\n", + " except Exception as e:\n", + " print(\n", + " f\"An unexpected error occurred while trying to kick off discovery for {item} failed: {e}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If testing_mode is enabled, use a test list:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_file_paths_and_collection_ids = [file_paths_and_collection_ids[0]]\n", + "test_discovery_item = [f\"{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n", + "\n", + "print(test_discovery_item)\n", + "print(test_file_paths_and_collection_ids)\n", + "print(VEDA_STAC_API)\n", + "\n", + "file_paths_and_collection_ids = (\n", + " test_file_paths_and_collection_ids\n", + " if testing_mode\n", + " else file_paths_and_collection_ids\n", + ")\n", + "discovery_items_to_process = (\n", + " test_discovery_item\n", + " if testing_mode\n", + " else discovery_items_to_process\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell publishes the collection to the target ingestion `api/collections` endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for collection in file_paths_and_collection_ids:\n", + " collection_id = collection[\"collectionId\"]\n", + " file_path = collection[\"filePath\"]\n", + "\n", + " try:\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", + " collection = json.load(file)\n", + "\n", + " # Publish the updated collection to the target ingestion `api/collections` endpoint\n", + " post_collection(collection, collection_id)\n", + "\n", + " except requests.RequestException as e:\n", + " print(f\"An error occurred for collectionId {collection_id}: {e}\")\n", + " except Exception as e:\n", + " print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for item in discovery_items_to_process:\n", + " if testing_mode:\n", + " file_path = f\"../ingestion-data/staging/discovery_items/{item}\"\n", + " else:\n", + " file_path = f\"../ingestion-data/production/discovery_items/{item}\"\n", + "\n", + " try:\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", + " item = json.load(file)\n", + "\n", + " # Publish the updated collection to the target ingestion `api/collections` endpoint\n", + " ingest_item(item)\n", + "\n", + " except requests.RequestException as e:\n", + " print(f\"An error occurred for collectionId {collection_id}: {e}\")\n", + " except Exception as e:\n", + " print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c5a0031752e87e22ac2774541427b44b1f940990 Mon Sep 17 00:00:00 2001 From: Jennifer Tran Date: Thu, 16 May 2024 11:28:23 -0700 Subject: [PATCH 2/9] fix: account for lists of items --- .../collection-and-item-workflows-ingest.ipynb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/transformation-scripts/collection-and-item-workflows-ingest.ipynb b/transformation-scripts/collection-and-item-workflows-ingest.ipynb index 9db638b5..7a90af3e 100644 --- a/transformation-scripts/collection-and-item-workflows-ingest.ipynb +++ b/transformation-scripts/collection-and-item-workflows-ingest.ipynb @@ -365,7 +365,11 @@ " item = json.load(file)\n", "\n", " # Publish the updated collection to the target ingestion `api/collections` endpoint\n", - " ingest_item(item)\n", + " if isinstance(item_json, list):\n", + " for single_item in item_json:\n", + " ingest_item(single_item)\n", + " else:\n", + " ingest_item(item)\n", "\n", " except requests.RequestException as e:\n", " print(f\"An error occurred for collectionId {collection_id}: {e}\")\n", From b3d25977e5cc0555f65c7dce14cad2069448f095 Mon Sep 17 00:00:00 2001 From: Jennifer Tran Date: Fri, 17 May 2024 09:35:29 -0700 Subject: [PATCH 3/9] fix: update naming and simplify logic --- ...collection-and-item-workflows-ingest.ipynb | 74 +++++++++---------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/transformation-scripts/collection-and-item-workflows-ingest.ipynb b/transformation-scripts/collection-and-item-workflows-ingest.ipynb index 7a90af3e..fb69b923 100644 --- a/transformation-scripts/collection-and-item-workflows-ingest.ipynb +++ b/transformation-scripts/collection-and-item-workflows-ingest.ipynb @@ -64,7 +64,7 @@ " \"combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO\",\n", "]\n", "\n", - "json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", + "collection_json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", "filtered_list = [\n", " item\n", " for item in json_file_paths\n", @@ -110,26 +110,14 @@ "metadata": {}, "outputs": [], "source": [ - "def load_json_files_from_directory(directory):\n", - " json_files = []\n", - " for filename in os.listdir(directory):\n", - " if filename.endswith(\".json\"):\n", - " json_files.append(filename)\n", - " return json_files\n", - "\n", - "\n", "def find_matching_file_names(collections_list, discovery_items_list):\n", " matching_file_names = []\n", " for collection_filename in collections_list:\n", - " collection_json = load_json_file(\n", - " os.path.join(collections_files, collection_filename)\n", - " )\n", + " collection_json = load_json_file(collection_filename)\n", " id1 = collection_json.get(\"id\")\n", " if id1 is not None:\n", " for discovery_items_filename in discovery_items_list:\n", - " item_json = load_json_file(\n", - " os.path.join(discovery_items_files, discovery_items_filename)\n", - " )\n", + " item_json = load_json_file(discovery_items_filename)\n", " if isinstance(item_json, list):\n", " if len(item_json) > 0:\n", " collection2 = item_json[0].get(\"collection\")\n", @@ -140,7 +128,6 @@ " if collection2 == id1:\n", " # Found a match\n", " matching_file_names.append(discovery_items_filename)\n", - " # Further processing or comparison can be done here\n", " break\n", " return matching_file_names\n", "\n", @@ -157,12 +144,15 @@ " else \"../ingestion-data/production/discovery-items/\"\n", ")\n", "\n", - "# Load JSON files from directories\n", - "json_files_dir1 = load_json_files_from_directory(collections_files)\n", - "json_files_dir2 = load_json_files_from_directory(discovery_items_files)\n", - "\n", + "discovery_items_json_file_paths = (\n", + " glob.glob(\"../ingestion-data/staging/discovery-items//*.json\")\n", + " if testing_mode\n", + " else glob.glob(\"../ingestion-data/production/discovery-items//*.json\")\n", + ")\n", "# Find matching file names\n", - "matching_file_names = find_matching_file_names(json_files_dir1, json_files_dir2)\n", + "matching_file_names = find_matching_file_names(\n", + " collections_json_file_paths, discovery_items_json_file_paths\n", + ")\n", "\n", "# for file_pair in matching_file_names:\n", "# print(\"Match found:\")\n", @@ -195,6 +185,7 @@ "mcp_prod_user_pool_id = \"CHANGE ME\"\n", "mcp_prod_identity_pool_id = \"CHANGE ME\"\n", "\n", + "print(f\"TESTING MODE? {testing_mode}\")\n", "if testing_mode:\n", " STAC_INGESTOR_API = f\"{test_endpoint}/api/ingest/\"\n", " VEDA_STAC_API = f\"{test_endpoint}/api/stac/\"\n", @@ -219,6 +210,7 @@ "outputs": [], "source": [ "TOKEN = \"REPLACE ME\"\n", + "\n", "authorization_header = f\"Bearer {TOKEN}\"\n", "headers = {\n", " \"Authorization\": authorization_header,\n", @@ -265,20 +257,22 @@ " )\n", "\n", "\n", - "def ingest_item(item):\n", + "def ingest_discovery_item(discovery_item):\n", " discovery_url = f\"{WORKFLOWS_API}/discovery\"\n", " try:\n", - " response = requests.post(discovery_url, json=item, headers=headers)\n", + " response = requests.post(\n", + " discovery_url, json=ingest_discovery_item, headers=headers\n", + " )\n", " response.raise_for_status()\n", " if response.status_code == 201:\n", " print(f\"Request was successful. \")\n", " else:\n", " print(\n", - " f\"Kicking off discovery for {item} failed. Request failed with status code: {response.status_code}\"\n", + " f\"Kicking off discovery for {ingest_discovery_item} failed. Request failed with status code: {response.status_code}\"\n", " )\n", " except requests.RequestException as e:\n", " print(\n", - " f\"Kicking off discovery for {item} failed. An error occurred during the request: {e}\"\n", + " f\"Kicking off discovery for {ingest_discovery_item} failed. An error occurred during the request: {e}\"\n", " )\n", " except Exception as e:\n", " print(\n", @@ -300,7 +294,7 @@ "outputs": [], "source": [ "test_file_paths_and_collection_ids = [file_paths_and_collection_ids[0]]\n", - "test_discovery_item = [f\"{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n", + "test_discovery_item = [f\"../ingestion-data/staging/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n", "\n", "print(test_discovery_item)\n", "print(test_file_paths_and_collection_ids)\n", @@ -315,7 +309,10 @@ " test_discovery_item\n", " if testing_mode\n", " else discovery_items_to_process\n", - ")" + ")\n", + "\n", + "print(file_paths_and_collection_ids)\n", + "print(discovery_items_to_process)" ] }, { @@ -354,27 +351,22 @@ "metadata": {}, "outputs": [], "source": [ - "for item in discovery_items_to_process:\n", - " if testing_mode:\n", - " file_path = f\"../ingestion-data/staging/discovery_items/{item}\"\n", - " else:\n", - " file_path = f\"../ingestion-data/production/discovery_items/{item}\"\n", - "\n", + "for discovery_item in discovery_items_to_process:\n", " try:\n", - " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", - " item = json.load(file)\n", + " with open(discovery_item, \"r\", encoding=\"utf-8\") as file:\n", + " discovery_item_json = json.load(file)\n", "\n", " # Publish the updated collection to the target ingestion `api/collections` endpoint\n", - " if isinstance(item_json, list):\n", - " for single_item in item_json:\n", - " ingest_item(single_item)\n", + " if isinstance(discovery_item_json, list):\n", + " for single_discovery_item in discovery_item_json:\n", + " ingest_discovery_item(single_discovery_item)\n", " else:\n", - " ingest_item(item)\n", + " ingest_discovery_item(discovery_item_json)\n", "\n", " except requests.RequestException as e:\n", - " print(f\"An error occurred for collectionId {collection_id}: {e}\")\n", + " print(f\"An error occurred for discovery item {discovery_item}: {e}\")\n", " except Exception as e:\n", - " print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")" + " print(f\"An unexpected error occurred for discovery item {discovery_item}: {e}\")" ] } ], From 2dc9b47c0df7d59f837f6a220ef414be56232daf Mon Sep 17 00:00:00 2001 From: Jennifer Tran Date: Fri, 17 May 2024 09:50:58 -0700 Subject: [PATCH 4/9] fix: more fixes to log statments, etc --- ...collection-and-item-workflows-ingest.ipynb | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/transformation-scripts/collection-and-item-workflows-ingest.ipynb b/transformation-scripts/collection-and-item-workflows-ingest.ipynb index fb69b923..8713a8dc 100644 --- a/transformation-scripts/collection-and-item-workflows-ingest.ipynb +++ b/transformation-scripts/collection-and-item-workflows-ingest.ipynb @@ -189,11 +189,11 @@ "if testing_mode:\n", " STAC_INGESTOR_API = f\"{test_endpoint}/api/ingest/\"\n", " VEDA_STAC_API = f\"{test_endpoint}/api/stac/\"\n", - " WORKFLOWS_API = \"https://4hrks0hk0b.execute-api.us-west-2.amazonaws.com/docs/\"\n", + " WORKFLOWS_API = \"https://4hrks0hk0b.execute-api.us-west-2.amazonaws.com/\"\n", "else:\n", " STAC_INGESTOR_API = f\"{mcp_prod_endpoint}/api/ingest/\"\n", " VEDA_STAC_API = f\"{mcp_prod_endpoint}/api/stac/\"\n", - " WORKFLOWS_API = \"https://bct2n8in53.execute-api.us-west-2.amazonaws.com/docs/\"" + " WORKFLOWS_API = \"https://bct2n8in53.execute-api.us-west-2.amazonaws.com/\"" ] }, { @@ -245,38 +245,37 @@ " )\n", " else:\n", " print(\n", - " f\"Updating {collection_id} failed. Request failed with status code: {response.status_code}\"\n", + " f\"ERROR: Updating {collection_id} failed. Request failed with status code: {response.status_code}\"\n", " )\n", " except requests.RequestException as e:\n", " print(\n", - " f\"Updating {collection_id} failed. An error occurred during the request: {e}\"\n", + " f\"ERROR: Updating {collection_id} failed. An error occurred during the request: {e}\"\n", " )\n", " except Exception as e:\n", " print(\n", - " f\"An unexpected error occurred while trying to update {collection_id}: {e}\"\n", + " f\"ERROR: An unexpected error occurred while trying to update {collection_id}: {e}\"\n", " )\n", "\n", "\n", "def ingest_discovery_item(discovery_item):\n", - " discovery_url = f\"{WORKFLOWS_API}/discovery\"\n", + " discovery_url = f\"{WORKFLOWS_API}discovery\"\n", + " print(discovery_url)\n", " try:\n", - " response = requests.post(\n", - " discovery_url, json=ingest_discovery_item, headers=headers\n", - " )\n", + " response = requests.post(discovery_url, json=discovery_item, headers=headers)\n", " response.raise_for_status()\n", " if response.status_code == 201:\n", - " print(f\"Request was successful. \")\n", + " print(f\"Request was successful. {response}\")\n", " else:\n", " print(\n", - " f\"Kicking off discovery for {ingest_discovery_item} failed. Request failed with status code: {response.status_code}\"\n", + " f\"ERROR: Kicking off discovery for {discovery_item} failed. Request failed with status code: {response.status_code}\"\n", " )\n", " except requests.RequestException as e:\n", " print(\n", - " f\"Kicking off discovery for {ingest_discovery_item} failed. An error occurred during the request: {e}\"\n", + " f\"ERROR: Kicking off discovery for {discovery_item} failed. An error occurred during the request: {e}\"\n", " )\n", " except Exception as e:\n", " print(\n", - " f\"An unexpected error occurred while trying to kick off discovery for {item} failed: {e}\"\n", + " f\"ERROR: An unexpected error occurred while trying to kick off discovery for {discovery_item} failed: {e}\"\n", " )" ] }, @@ -345,6 +344,13 @@ " print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell kicks off a `/discovery` workflow for all the discovery items " + ] + }, { "cell_type": "code", "execution_count": null, @@ -352,6 +358,7 @@ "outputs": [], "source": [ "for discovery_item in discovery_items_to_process:\n", + " print(discovery_item)\n", " try:\n", " with open(discovery_item, \"r\", encoding=\"utf-8\") as file:\n", " discovery_item_json = json.load(file)\n", From 09004f0a6cd9dd18e4b3013d468c234dc174c754 Mon Sep 17 00:00:00 2001 From: Jennifer Tran Date: Fri, 17 May 2024 10:50:58 -0700 Subject: [PATCH 5/9] fix: update item and collection logic to match against filtered list --- ...collection-and-item-workflows-ingest.ipynb | 77 ++++++++++++------- 1 file changed, 50 insertions(+), 27 deletions(-) diff --git a/transformation-scripts/collection-and-item-workflows-ingest.ipynb b/transformation-scripts/collection-and-item-workflows-ingest.ipynb index 8713a8dc..b5807ea2 100644 --- a/transformation-scripts/collection-and-item-workflows-ingest.ipynb +++ b/transformation-scripts/collection-and-item-workflows-ingest.ipynb @@ -65,7 +65,7 @@ "]\n", "\n", "collection_json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", - "filtered_list = [\n", + "filtered_collection_file_paths_list = [\n", " item\n", " for item in json_file_paths\n", " if all(\n", @@ -73,10 +73,11 @@ " for excluded_collections in excluded_collections\n", " )\n", "]\n", + "print(filtered_collection_file_paths_list)\n", "\n", "file_paths_and_collection_ids = [\n", " {\"filePath\": file_path, \"collectionId\": data[\"id\"]}\n", - " for file_path in filtered_list\n", + " for file_path in filtered_collection_file_paths_list\n", " if \"id\" in (data := json.load(open(file_path, \"r\")))\n", "]" ] @@ -137,21 +138,13 @@ " return json.load(file)\n", "\n", "\n", - "collections_files = \"../ingestion-data/collections/\"\n", - "discovery_items_files = (\n", - " \"../ingestion-data/staging/discovery-items/\"\n", - " if testing_mode\n", - " else \"../ingestion-data/production/discovery-items/\"\n", + "discovery_items_json_file_paths = glob.glob(\n", + " \"../ingestion-data/production/discovery-items//*.json\"\n", ")\n", "\n", - "discovery_items_json_file_paths = (\n", - " glob.glob(\"../ingestion-data/staging/discovery-items//*.json\")\n", - " if testing_mode\n", - " else glob.glob(\"../ingestion-data/production/discovery-items//*.json\")\n", - ")\n", "# Find matching file names\n", "matching_file_names = find_matching_file_names(\n", - " collections_json_file_paths, discovery_items_json_file_paths\n", + " filtered_collection_file_paths_list, discovery_items_json_file_paths\n", ")\n", "\n", "# for file_pair in matching_file_names:\n", @@ -219,6 +212,15 @@ "}" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "failed_discovery_items = []" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -257,7 +259,7 @@ " )\n", "\n", "\n", - "def ingest_discovery_item(discovery_item):\n", + "def ingest_discovery_item(discovery_item, discovery_item_path):\n", " discovery_url = f\"{WORKFLOWS_API}discovery\"\n", " print(discovery_url)\n", " try:\n", @@ -269,21 +271,33 @@ " print(\n", " f\"ERROR: Kicking off discovery for {discovery_item} failed. Request failed with status code: {response.status_code}\"\n", " )\n", + " failed_discovery_items.append(discovery_item_path)\n", " except requests.RequestException as e:\n", " print(\n", " f\"ERROR: Kicking off discovery for {discovery_item} failed. An error occurred during the request: {e}\"\n", " )\n", + " failed_discovery_items.append(discovery_item_path)\n", " except Exception as e:\n", " print(\n", " f\"ERROR: An unexpected error occurred while trying to kick off discovery for {discovery_item} failed: {e}\"\n", - " )" + " )\n", + " failed_discovery_items.append(discovery_item_path)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "If testing_mode is enabled, use a test list:" + "If super_testing_mode is enabled, use a test list against a single collection:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "super_testing_mode = False" ] }, { @@ -293,7 +307,7 @@ "outputs": [], "source": [ "test_file_paths_and_collection_ids = [file_paths_and_collection_ids[0]]\n", - "test_discovery_item = [f\"../ingestion-data/staging/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n", + "test_discovery_item = [f\"../ingestion-data/production/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n", "\n", "print(test_discovery_item)\n", "print(test_file_paths_and_collection_ids)\n", @@ -301,12 +315,12 @@ "\n", "file_paths_and_collection_ids = (\n", " test_file_paths_and_collection_ids\n", - " if testing_mode\n", + " if super_testing_mode\n", " else file_paths_and_collection_ids\n", ")\n", "discovery_items_to_process = (\n", " test_discovery_item\n", - " if testing_mode\n", + " if super_testing_mode\n", " else discovery_items_to_process\n", ")\n", "\n", @@ -357,23 +371,32 @@ "metadata": {}, "outputs": [], "source": [ - "for discovery_item in discovery_items_to_process:\n", - " print(discovery_item)\n", + "for discovery_item_path in discovery_items_to_process:\n", " try:\n", " with open(discovery_item, \"r\", encoding=\"utf-8\") as file:\n", " discovery_item_json = json.load(file)\n", "\n", - " # Publish the updated collection to the target ingestion `api/collections` endpoint\n", " if isinstance(discovery_item_json, list):\n", - " for single_discovery_item in discovery_item_json:\n", - " ingest_discovery_item(single_discovery_item)\n", + " for single_discovery_item_json in discovery_item_json:\n", + " ingest_discovery_item(single_discovery_item_json, discovery_item_path)\n", " else:\n", - " ingest_discovery_item(discovery_item_json)\n", + " ingest_discovery_item(discovery_item_json, discovery_item_path)\n", "\n", " except requests.RequestException as e:\n", - " print(f\"An error occurred for discovery item {discovery_item}: {e}\")\n", + " print(f\"An error occurred for discovery item {discovery_item_path}: {e}\")\n", " except Exception as e:\n", - " print(f\"An unexpected error occurred for discovery item {discovery_item}: {e}\")" + " print(\n", + " f\"An unexpected error occurred for discovery item {discovery_item_path}: {e}\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(failed_discovery_items)" ] } ], From b5357e66c9f557f4d081648bc131756929bd0cdd Mon Sep 17 00:00:00 2001 From: Jennifer Tran Date: Fri, 17 May 2024 13:32:32 -0700 Subject: [PATCH 6/9] fix: update notebook and fix modis-lst-night-diff-2015-2022 bucket name --- .../discovery-items/modis-lst-night-diff-2015-2022.json | 2 +- .../collection-and-item-workflows-ingest.ipynb | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ingestion-data/production/discovery-items/modis-lst-night-diff-2015-2022.json b/ingestion-data/production/discovery-items/modis-lst-night-diff-2015-2022.json index a5ed010e..7829560e 100644 --- a/ingestion-data/production/discovery-items/modis-lst-night-diff-2015-2022.json +++ b/ingestion-data/production/discovery-items/modis-lst-night-diff-2015-2022.json @@ -1,6 +1,6 @@ { "collection": "modis-lst-night-diff-2015-2022", - "bucket": "veda-data-store/", + "bucket": "veda-data-store", "prefix": "modis-lst-night-diff-2015-2022/", "filename_regex": "^(.*)campfire_lst_night_difference", "discovery": "s3", diff --git a/transformation-scripts/collection-and-item-workflows-ingest.ipynb b/transformation-scripts/collection-and-item-workflows-ingest.ipynb index b5807ea2..797d73f6 100644 --- a/transformation-scripts/collection-and-item-workflows-ingest.ipynb +++ b/transformation-scripts/collection-and-item-workflows-ingest.ipynb @@ -373,8 +373,9 @@ "source": [ "for discovery_item_path in discovery_items_to_process:\n", " try:\n", - " with open(discovery_item, \"r\", encoding=\"utf-8\") as file:\n", + " with open(discovery_item_path, \"r\", encoding=\"utf-8\") as file:\n", " discovery_item_json = json.load(file)\n", + " print(discovery_item_json)\n", "\n", " if isinstance(discovery_item_json, list):\n", " for single_discovery_item_json in discovery_item_json:\n", From 6b6af93c75e9e226a4fa0c50f3c20f90fa32dd06 Mon Sep 17 00:00:00 2001 From: Jennifer Tran Date: Fri, 17 May 2024 13:33:31 -0700 Subject: [PATCH 7/9] fix: fix staging bucket name for modis-lst-night-diff-2015-2022 --- .../staging/discovery-items/modis-lst-night-diff-2015-2022.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingestion-data/staging/discovery-items/modis-lst-night-diff-2015-2022.json b/ingestion-data/staging/discovery-items/modis-lst-night-diff-2015-2022.json index e8d44914..7a44d9ad 100644 --- a/ingestion-data/staging/discovery-items/modis-lst-night-diff-2015-2022.json +++ b/ingestion-data/staging/discovery-items/modis-lst-night-diff-2015-2022.json @@ -1,6 +1,6 @@ { "collection": "modis-lst-night-diff-2015-2022", - "bucket": "veda-data-store-staging/", + "bucket": "veda-data-store-staging", "prefix": "modis-lst-night-diff-2015-2022", "filename_regex":"^(.*)campfire_lst_night_difference", "discovery": "s3", From 22f374a2c4f34a655e4310de9266db677a393559 Mon Sep 17 00:00:00 2001 From: Jennifer Tran Date: Mon, 20 May 2024 14:59:17 -0700 Subject: [PATCH 8/9] fix: update nceo_africa_2017.json and houston-lst-diff.json --- ingestion-data/production/discovery-items/houston-lst-diff.json | 2 +- ingestion-data/production/discovery-items/nceo_africa_2017.json | 2 +- ingestion-data/staging/discovery-items/houston-lst-diff.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ingestion-data/production/discovery-items/houston-lst-diff.json b/ingestion-data/production/discovery-items/houston-lst-diff.json index 5100f410..e56d120f 100644 --- a/ingestion-data/production/discovery-items/houston-lst-diff.json +++ b/ingestion-data/production/discovery-items/houston-lst-diff.json @@ -1,6 +1,6 @@ { "collection": "houston-lst-diff", - "bucket": "climatedashboard-data", + "bucket": "veda-data-store", "prefix": "houston-lst-diff/", "filename_regex": "^(.*)houston-lst-diff_2000_2019", "discovery": "s3", diff --git a/ingestion-data/production/discovery-items/nceo_africa_2017.json b/ingestion-data/production/discovery-items/nceo_africa_2017.json index c59a7022..7efc91fd 100644 --- a/ingestion-data/production/discovery-items/nceo_africa_2017.json +++ b/ingestion-data/production/discovery-items/nceo_africa_2017.json @@ -1,6 +1,6 @@ { "collection": "nceo_africa_2017", - "prefix": "nceo_africa_2017/", + "prefix": "file-staging/nasa-map/nceo-africa-2017/", "bucket": "nasa-maap-data-store", "filename_regex": "^(.*)AGB_map_2017v0m_COG.tif$", "discovery": "s3", diff --git a/ingestion-data/staging/discovery-items/houston-lst-diff.json b/ingestion-data/staging/discovery-items/houston-lst-diff.json index a06a94f8..61490a1e 100644 --- a/ingestion-data/staging/discovery-items/houston-lst-diff.json +++ b/ingestion-data/staging/discovery-items/houston-lst-diff.json @@ -1,6 +1,6 @@ { "collection": "houston-lst-diff", - "bucket": "climatedashboard-data", + "bucket": "veda-data-store-staging", "prefix":"houston/", "filename_regex":"^(.*)houston-lst-diff_2000_2019", "discovery": "s3", From dfaff7ffbb0201f4e77b2a05a7b7e5882a4a3b8f Mon Sep 17 00:00:00 2001 From: Jennifer Tran Date: Mon, 20 May 2024 17:14:40 -0700 Subject: [PATCH 9/9] fix: work in progress, add new notebook --- ...collection-and-item-workflows-ingest.ipynb | 11 +- ...collection-and-item-workflows-ingest.ipynb | 502 ++++++++++++++++++ 2 files changed, 505 insertions(+), 8 deletions(-) create mode 100644 transformation-scripts/special-collection-and-item-workflows-ingest.ipynb diff --git a/transformation-scripts/collection-and-item-workflows-ingest.ipynb b/transformation-scripts/collection-and-item-workflows-ingest.ipynb index 797d73f6..6beca9e8 100644 --- a/transformation-scripts/collection-and-item-workflows-ingest.ipynb +++ b/transformation-scripts/collection-and-item-workflows-ingest.ipynb @@ -67,7 +67,7 @@ "collection_json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", "filtered_collection_file_paths_list = [\n", " item\n", - " for item in json_file_paths\n", + " for item in collection_json_file_paths\n", " if all(\n", " excluded_collections not in item\n", " for excluded_collections in excluded_collections\n", @@ -169,14 +169,8 @@ "outputs": [], "source": [ "test_endpoint = \"https://test.openveda.cloud\"\n", - "test_client_id = \"CHANGE ME\"\n", - "test_user_pool_id = \"CHANGE ME\"\n", - "test_identity_pool_id = \"CHANGE ME\"\n", "\n", "mcp_prod_endpoint = \"https://openveda.cloud\"\n", - "mcp_prod_client_id = \"CHANGE ME\"\n", - "mcp_prod_user_pool_id = \"CHANGE ME\"\n", - "mcp_prod_identity_pool_id = \"CHANGE ME\"\n", "\n", "print(f\"TESTING MODE? {testing_mode}\")\n", "if testing_mode:\n", @@ -307,7 +301,8 @@ "outputs": [], "source": [ "test_file_paths_and_collection_ids = [file_paths_and_collection_ids[0]]\n", - "test_discovery_item = [f\"../ingestion-data/production/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n", + "test_discovery_json_path = f\"../ingestion-data/production/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"\n", + "test_discovery_item = [test_discovery_json_path]\n", "\n", "print(test_discovery_item)\n", "print(test_file_paths_and_collection_ids)\n", diff --git a/transformation-scripts/special-collection-and-item-workflows-ingest.ipynb b/transformation-scripts/special-collection-and-item-workflows-ingest.ipynb new file mode 100644 index 00000000..bf535460 --- /dev/null +++ b/transformation-scripts/special-collection-and-item-workflows-ingest.ipynb @@ -0,0 +1,502 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook to Publish Special Collections and Start Discovery Workflow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook publishes the following collections in `/ingestion-data/collections`:\n", + "- 'hls-l30-002-ej-reprocessed'\n", + "- 'hls-s30-002-ej-reprocessed'\n", + "- 'ls8-covid-19-example-data'\n", + "- 'landsat-c2l2-sr-antarctic-glaciers-pine-island'\n", + "- 'landsat-c2l2-sr-lakes-aral-sea'\n", + "- 'landsat-c2l2-sr-lakes-tonle-sap'\n", + "- 'landsat-c2l2-sr-lakes-lake-balaton'\n", + "- 'landsat-c2l2-sr-lakes-vanern'\n", + "- 'landsat-c2l2-sr-antarctic-glaciers-thwaites'\n", + "- 'landsat-c2l2-sr-lakes-lake-biwa'\n", + "- 'combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO'" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "import os\n", + "import json\n", + "import requests\n", + "\n", + "from cognito_client import CognitoClient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell retrieves collection JSON files from `/ingestion-data/collections/` and save collectionIds to a list." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['../ingestion-data/collections/hls-l30-002-ej-reprocessed.json', '../ingestion-data/collections/hls-s30-002-ej-reprocessed.json', '../ingestion-data/collections/ls8-covid-19-example-data.json', '../ingestion-data/collections/landsat-c2l2-sr-antarctic-glaciers-pine-island.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-aral-sea.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-tonle-sap.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-lake-balaton.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-vanern.json', '../ingestion-data/collections/landsat-c2l2-sr-antarctic-glaciers-thwaites.json', '../ingestion-data/collections/landsat-c2l2-sr-lakes-lake-biwa.json', '../ingestion-data/collections/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO.json', '../ingestion-data/collections/nceo_africa_2017.json']\n" + ] + } + ], + "source": [ + "special_collections = [\n", + " \"hls-l30-002-ej-reprocessed\",\n", + " \"hls-s30-002-ej-reprocessed\",\n", + " \"ls8-covid-19-example-data\",\n", + " \"landsat-c2l2-sr-antarctic-glaciers-pine-island\",\n", + " \"landsat-c2l2-sr-lakes-aral-sea\",\n", + " \"landsat-c2l2-sr-lakes-tonle-sap\",\n", + " \"landsat-c2l2-sr-lakes-lake-balaton\",\n", + " \"landsat-c2l2-sr-lakes-vanern\",\n", + " \"landsat-c2l2-sr-antarctic-glaciers-thwaites\",\n", + " \"landsat-c2l2-sr-lakes-lake-biwa\",\n", + " \"combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO\",\n", + " \"nceo_africa_2017\",\n", + "]\n", + "\n", + "collection_file_paths = [\n", + " f\"../ingestion-data/collections/{collection}.json\"\n", + " for collection in special_collections\n", + "]\n", + "print(collection_file_paths)\n", + "\n", + "file_paths_and_collection_ids = [\n", + " {\"filePath\": file_path, \"collectionId\": data[\"id\"]}\n", + " for file_path in collection_file_paths\n", + " if \"id\" in (data := json.load(open(file_path, \"r\")))\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the testing mode to `True` when testing and `False` otherwise. When the testing mode is `True`, the notebook will be set to run against `dev` endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "testing_mode = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell compares files in '/ingestion/collections' with those in 'ingestion/staging/discovery-items' or 'ingestion/production/discovery-items' and returns a list of all the discovery-items that have a corresponding collection." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['../ingestion-data/production/discovery-items/hls-l30-002-ej-reprocessed.json', '../ingestion-data/production/discovery-items/hls-s30-002-ej-reprocessed.json', '../ingestion-data/production/discovery-items/ls8-covid-19-example-data.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-antarctic-glaciers-pine-island.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-aral-sea.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-tonle-sap.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-lake-balaton.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-vanern.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-antarctic-glaciers-thwaites.json', '../ingestion-data/production/discovery-items/landsat-c2l2-sr-lakes-lake-biwa.json', '../ingestion-data/production/discovery-items/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json']\n", + "['../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json', '../ingestion-data/production/discovery-items/nightlights-hd-3bands.json', '../ingestion-data/production/discovery-items/nceo_africa_2017.json']\n" + ] + } + ], + "source": [ + "items_in_external_buckets = []\n", + "\n", + "\n", + "def find_matching_file_names(collections_list, discovery_items_list):\n", + " matching_file_names = []\n", + " for collection_filename in collections_list:\n", + " collection_json = load_json_file(collection_filename)\n", + " id1 = collection_json.get(\"id\")\n", + " if id1 is not None:\n", + " for discovery_items_filename in discovery_items_list:\n", + " item_json = load_json_file(discovery_items_filename)\n", + " if isinstance(item_json, list):\n", + " if len(item_json) > 0:\n", + " collection2 = item_json[0].get(\"collection\")\n", + " if (\n", + " \"bucket\" in item_json\n", + " and item_json[0].get(\"bucket\") != \"veda-data-store\"\n", + " ):\n", + " items_in_external_buckets.append(discovery_items_filename)\n", + " else:\n", + " collection2 = item_json.get(\"collection\")\n", + " if collection2 is not None:\n", + " if (\n", + " \"bucket\" in item_json\n", + " and item_json.get(\"bucket\") != \"veda-data-store\"\n", + " ):\n", + " items_in_external_buckets.append(discovery_items_filename)\n", + " if collection2 == id1:\n", + " # Found a match\n", + " matching_file_names.append(discovery_items_filename)\n", + " break\n", + " return matching_file_names\n", + "\n", + "\n", + "def load_json_file(file_path):\n", + " with open(file_path, \"r\") as file:\n", + " return json.load(file)\n", + "\n", + "\n", + "discovery_items_json_file_paths = glob.glob(\n", + " \"../ingestion-data/production/discovery-items//*.json\"\n", + ")\n", + "\n", + "# Find matching file names\n", + "matching_file_names = find_matching_file_names(\n", + " collection_file_paths, discovery_items_json_file_paths\n", + ")\n", + "\n", + "special_items_to_process = matching_file_names\n", + "print(special_items_to_process)\n", + "print(items_in_external_buckets)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "testing_mode = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Have your Cognito `username` and `password` ready to set up Cognito Client to retrieve a token that will be used to access the STAC Ingestor API." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "test_endpoint = \"https://test.openveda.cloud\"\n", + "test_client_id = \"CHANGE ME\"\n", + "test_user_pool_id = \"CHANGE ME\"\n", + "test_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "mcp_prod_endpoint = \"https://openveda.cloud\"\n", + "mcp_prod_client_id = \"CHANGE ME\"\n", + "mcp_prod_user_pool_id = \"CHANGE ME\"\n", + "mcp_prod_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "staging_endpoint = \"https://staging-stac.delta-backend.com/\"\n", + "staging_client_id = \"CHANGE ME\"\n", + "staging_user_pool_id = \"CHANGE ME\"\n", + "staging_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "if testing_mode:\n", + " STAC_INGESTOR_API = f\"{test_endpoint}/api/ingest/\"\n", + " VEDA_STAC_API = f\"{test_endpoint}/api/stac/\"\n", + " WORKFLOWS_API = \"https://4hrks0hk0b.execute-api.us-west-2.amazonaws.com/\"\n", + "else:\n", + " STAC_INGESTOR_API = f\"{mcp_prod_endpoint}/api/ingest/\"\n", + " VEDA_STAC_API = f\"{mcp_prod_endpoint}/api/stac/\"\n", + " WORKFLOWS_API = \"https://bct2n8in53.execute-api.us-west-2.amazonaws.com/\"\n", + "\n", + "client = CognitoClient(\n", + " client_id=staging_client_id,\n", + " user_pool_id=staging_user_pool_id,\n", + " identity_pool_id=staging_identity_pool_id,\n", + ")\n", + "_ = client.login()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell sets up headers for requests." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "TOKEN = client.access_token\n", + "authorization_header = f\"Bearer {TOKEN}\"\n", + "headers = {\n", + " \"Authorization\": authorization_header,\n", + " \"content-type\": \"application/json\",\n", + " \"accept\": \"application/json\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell defines the function that will post the collection." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "JSON content: {'id': 'AGB_map_2017v0m_COG', 'bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'type': 'Feature', 'links': [{'rel': 'collection', 'type': 'application/json', 'href': 'https://staging-stac.delta-backend.com/collections/nceo_africa_2017'}, {'rel': 'parent', 'type': 'application/json', 'href': 'https://staging-stac.delta-backend.com/collections/nceo_africa_2017'}, {'rel': 'root', 'type': 'application/json', 'href': 'https://staging-stac.delta-backend.com/'}, {'rel': 'self', 'type': 'application/geo+json', 'href': 'https://staging-stac.delta-backend.com/collections/nceo_africa_2017/items/AGB_map_2017v0m_COG'}, {'title': 'Map of Item', 'href': 'https://3hwvk17uek.execute-api.us-west-2.amazonaws.com/stac/map?collection=nceo_africa_2017&item=AGB_map_2017v0m_COG&assets=cog_default&rescale=0%2C400&colormap_name=gist_earth_r', 'rel': 'preview', 'type': 'text/html'}], 'assets': {'cog_default': {'href': 's3://nasa-maap-data-store/file-staging/nasa-map/nceo-africa-2017/AGB_map_2017v0m_COG.tif', 'type': 'image/tiff; application=geotiff; profile=cloud-optimized', 'roles': ['data', 'layer'], 'title': 'Default COG Layer', 'description': 'Cloud optimized default layer to display on map', 'raster:bands': [{'scale': 1.0, 'nodata': 'inf', 'offset': 0.0, 'sampling': 'area', 'data_type': 'uint16', 'histogram': {'max': 429.0, 'min': 0.0, 'count': 11.0, 'buckets': [405348.0, 44948.0, 18365.0, 6377.0, 3675.0, 3388.0, 3785.0, 9453.0, 13108.0, 1186.0]}, 'statistics': {'mean': 37.58407913145342, 'stddev': 81.36678677343947, 'maximum': 429.0, 'minimum': 0.0, 'valid_percent': 50.42436439336373}}]}, 'rendered_preview': {'title': 'Rendered preview', 'href': 'https://3hwvk17uek.execute-api.us-west-2.amazonaws.com/stac/preview.png?collection=nceo_africa_2017&item=AGB_map_2017v0m_COG&assets=cog_default&rescale=0%2C400&colormap_name=gist_earth_r', 'rel': 'preview', 'roles': ['overview'], 'type': 'image/png'}}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'collection': 'nceo_africa_2017', 'properties': {'proj:bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'proj:epsg': 4326.0, 'proj:shape': [81024.0, 78077.0], 'end_datetime': '2017-12-31T23:59:59+00:00', 'proj:geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'proj:transform': [0.0008983152841195214, 0.0, -18.273529509559307, 0.0, -0.0008983152841195214, 37.73103856358817, 0.0, 0.0, 1.0], 'start_datetime': '2017-01-01T00:00:00+00:00'}, 'stac_version': '1.0.0', 'stac_extensions': ['https://stac-extensions.github.io/projection/v1.0.0/schema.json', 'https://stac-extensions.github.io/raster/v1.1.0/schema.json']}\n", + "ITEM {'id': 'AGB_map_2017v0m_COG', 'bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'type': 'Feature', 'links': [], 'assets': {'cog_default': {'href': 's3://nasa-maap-data-store/file-staging/nasa-map/nceo-africa-2017/AGB_map_2017v0m_COG.tif', 'type': 'image/tiff; application=geotiff; profile=cloud-optimized', 'roles': ['data', 'layer'], 'title': 'Default COG Layer', 'description': 'Cloud optimized default layer to display on map', 'raster:bands': [{'scale': 1.0, 'nodata': 'inf', 'offset': 0.0, 'sampling': 'area', 'data_type': 'uint16', 'histogram': {'max': 429.0, 'min': 0.0, 'count': 11.0, 'buckets': [405348.0, 44948.0, 18365.0, 6377.0, 3675.0, 3388.0, 3785.0, 9453.0, 13108.0, 1186.0]}, 'statistics': {'mean': 37.58407913145342, 'stddev': 81.36678677343947, 'maximum': 429.0, 'minimum': 0.0, 'valid_percent': 50.42436439336373}}]}, 'rendered_preview': {'title': 'Rendered preview', 'href': 'https://3hwvk17uek.execute-api.us-west-2.amazonaws.com/stac/preview.png?collection=nceo_africa_2017&item=AGB_map_2017v0m_COG&assets=cog_default&rescale=0%2C400&colormap_name=gist_earth_r', 'rel': 'preview', 'roles': ['overview'], 'type': 'image/png'}}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'collection': 'nceo_africa_2017', 'properties': {'proj:bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'proj:epsg': 4326.0, 'proj:shape': [81024.0, 78077.0], 'end_datetime': '2017-12-31T23:59:59+00:00', 'proj:geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'proj:transform': [0.0008983152841195214, 0.0, -18.273529509559307, 0.0, -0.0008983152841195214, 37.73103856358817, 0.0, 0.0, 1.0], 'start_datetime': '2017-01-01T00:00:00+00:00'}, 'stac_version': '1.0.0', 'stac_extensions': ['https://stac-extensions.github.io/projection/v1.0.0/schema.json', 'https://stac-extensions.github.io/raster/v1.1.0/schema.json']}\n", + "FINAL {'id': 'AGB_map_2017v0m_COG', 'bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'type': 'Feature', 'links': [], 'assets': {'cog_default': {'href': 's3://nasa-maap-data-store/file-staging/nasa-map/nceo-africa-2017/AGB_map_2017v0m_COG.tif', 'type': 'image/tiff; application=geotiff; profile=cloud-optimized', 'roles': ['data', 'layer'], 'title': 'Default COG Layer', 'description': 'Cloud optimized default layer to display on map', 'raster:bands': [{'scale': 1.0, 'nodata': 'inf', 'offset': 0.0, 'sampling': 'area', 'data_type': 'uint16', 'histogram': {'max': 429.0, 'min': 0.0, 'count': 11.0, 'buckets': [405348.0, 44948.0, 18365.0, 6377.0, 3675.0, 3388.0, 3785.0, 9453.0, 13108.0, 1186.0]}, 'statistics': {'mean': 37.58407913145342, 'stddev': 81.36678677343947, 'maximum': 429.0, 'minimum': 0.0, 'valid_percent': 50.42436439336373}}]}}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'collection': 'nceo_africa_2017', 'properties': {'proj:bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'proj:epsg': 4326.0, 'proj:shape': [81024.0, 78077.0], 'end_datetime': '2017-12-31T23:59:59+00:00', 'proj:geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'proj:transform': [0.0008983152841195214, 0.0, -18.273529509559307, 0.0, -0.0008983152841195214, 37.73103856358817, 0.0, 0.0, 1.0], 'start_datetime': '2017-01-01T00:00:00+00:00'}, 'stac_version': '1.0.0', 'stac_extensions': ['https://stac-extensions.github.io/projection/v1.0.0/schema.json', 'https://stac-extensions.github.io/raster/v1.1.0/schema.json']}\n", + "{'id': 'AGB_map_2017v0m_COG', 'bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'type': 'Feature', 'links': [], 'assets': {'cog_default': {'href': 's3://nasa-maap-data-store/file-staging/nasa-map/nceo-africa-2017/AGB_map_2017v0m_COG.tif', 'type': 'image/tiff; application=geotiff; profile=cloud-optimized', 'roles': ['data', 'layer'], 'title': 'Default COG Layer', 'description': 'Cloud optimized default layer to display on map', 'raster:bands': [{'scale': 1.0, 'nodata': 'inf', 'offset': 0.0, 'sampling': 'area', 'data_type': 'uint16', 'histogram': {'max': 429.0, 'min': 0.0, 'count': 11.0, 'buckets': [405348.0, 44948.0, 18365.0, 6377.0, 3675.0, 3388.0, 3785.0, 9453.0, 13108.0, 1186.0]}, 'statistics': {'mean': 37.58407913145342, 'stddev': 81.36678677343947, 'maximum': 429.0, 'minimum': 0.0, 'valid_percent': 50.42436439336373}}]}}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'collection': 'nceo_africa_2017', 'properties': {'proj:bbox': [-18.273529509559307, -35.054059016911935, 51.86423292864056, 37.73103856358817], 'proj:epsg': 4326.0, 'proj:shape': [81024.0, 78077.0], 'end_datetime': '2017-12-31T23:59:59+00:00', 'proj:geometry': {'type': 'Polygon', 'coordinates': [[[-18.273529509559307, -35.054059016911935], [51.86423292864056, -35.054059016911935], [51.86423292864056, 37.73103856358817], [-18.273529509559307, 37.73103856358817], [-18.273529509559307, -35.054059016911935]]]}, 'proj:transform': [0.0008983152841195214, 0.0, -18.273529509559307, 0.0, -0.0008983152841195214, 37.73103856358817, 0.0, 0.0, 1.0], 'start_datetime': '2017-01-01T00:00:00+00:00'}, 'stac_version': '1.0.0', 'stac_extensions': ['https://stac-extensions.github.io/projection/v1.0.0/schema.json', 'https://stac-extensions.github.io/raster/v1.1.0/schema.json']}\n" + ] + } + ], + "source": [ + "def remove_links(item):\n", + " item[\"links\"] = []\n", + " print(f\"ITEM {item}\")\n", + " return item\n", + "\n", + "\n", + "def remove_rendered_preview(item):\n", + " if item[\"assets\"][\"rendered_preview\"]:\n", + " del item[\"assets\"][\"rendered_preview\"]\n", + " return item\n", + "\n", + "\n", + "# def add_null_datetime(item):\n", + "# item[\"properties\"][\"datetime\"] = None\n", + "\n", + "\n", + "def get_item_to_ingest(collection_id):\n", + " url = f\"{staging_endpoint}/collections/{collection_id}/items\"\n", + " response = requests.get(url, headers=headers)\n", + " response.raise_for_status()\n", + " json_response = response.json()\n", + " features = json_response.get(\"features\")\n", + " for feature in features:\n", + " # Iterate through links\n", + " for link in feature[\"links\"]:\n", + " # Check if rel is \"self\"\n", + " if link[\"rel\"] == \"self\":\n", + " # If rel is \"self\", extract href\n", + " href = link[\"href\"]\n", + " break # Exit loop once href is found\n", + " if href: # If href is found, break outer loop\n", + " break\n", + " return href\n", + "\n", + "\n", + "def modify_item_before_ingest(item_href):\n", + " try:\n", + " response = requests.get(item_href)\n", + " response.raise_for_status() # Raise an exception for HTTP errors\n", + " json_content = response.json() # Parse JSON response\n", + " print(\"JSON content:\", json_content)\n", + " json_content = remove_links(json_content)\n", + " json_content = remove_rendered_preview(json_content)\n", + " # json_content = add_null_datetime(json_content)\n", + " print(f\"FINAL {json_content}\")\n", + " return json_content\n", + " except requests.exceptions.RequestException as e:\n", + " print(\"Error fetching JSON content:\", e)\n", + "\n", + "\n", + "# TESTING THINGS\n", + "item_to_ingest = get_item_to_ingest(\"nceo_africa_2017\")\n", + "finalized_item = modify_item_before_ingest(item_to_ingest)\n", + "print(finalized_item)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'data' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 50\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError fetching JSON content:\u001b[39m\u001b[38;5;124m\"\u001b[39m, e)\n\u001b[0;32m---> 50\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mget_item_to_ingest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnceo_africa_2017\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28mprint\u001b[39m(result)\n\u001b[1;32m 53\u001b[0m failed_ingest_items \u001b[38;5;241m=\u001b[39m []\n", + "Cell \u001b[0;32mIn[6], line 31\u001b[0m, in \u001b[0;36mget_item_to_ingest\u001b[0;34m(collection_id)\u001b[0m\n\u001b[1;32m 29\u001b[0m json_response \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n\u001b[1;32m 30\u001b[0m features \u001b[38;5;241m=\u001b[39m json_response\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfeatures\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 31\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m feature \u001b[38;5;129;01min\u001b[39;00m \u001b[43mdata\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfeatures\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 32\u001b[0m \u001b[38;5;66;03m# Iterate through links\u001b[39;00m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m link \u001b[38;5;129;01min\u001b[39;00m feature[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlinks\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# Check if rel is \"self\"\u001b[39;00m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m link[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrel\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mself\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 36\u001b[0m \u001b[38;5;66;03m# If rel is \"self\", extract href\u001b[39;00m\n", + "\u001b[0;31mNameError\u001b[0m: name 'data' is not defined" + ] + } + ], + "source": [ + "def post_collection(collection, collection_id):\n", + " collection_url = f\"{VEDA_STAC_API}collections/{collection_id}\"\n", + " ingest_url = f\"{STAC_INGESTOR_API}collections\"\n", + "\n", + " try:\n", + " response = requests.post(ingest_url, json=collection, headers=headers)\n", + " response.raise_for_status()\n", + " if response.status_code == 201:\n", + " print(\n", + " f\"Request was successful. Find the updated collection at {collection_url}\"\n", + " )\n", + " else:\n", + " print(\n", + " f\"ERROR: Updating {collection_id} failed. Request failed with status code: {response.status_code}\"\n", + " )\n", + " except requests.RequestException as e:\n", + " print(\n", + " f\"ERROR: Updating {collection_id} failed. An error occurred during the request: {e}\"\n", + " )\n", + " except Exception as e:\n", + " print(\n", + " f\"ERROR: An unexpected error occurred while trying to update {collection_id}: {e}\"\n", + " )\n", + "\n", + "\n", + "failed_ingest_items = []\n", + "\n", + "\n", + "def ingest_external_item(external_item, external_item_path):\n", + " ingest_url = f\"{STAC_INGESTOR_API}ingestion\"\n", + " print(ingest_url)\n", + " try:\n", + " response = requests.post(ingest_url, json=external_item, headers=headers)\n", + " response.raise_for_status()\n", + " if response.status_code == 201:\n", + " print(f\"Request was successful. {response}\")\n", + " else:\n", + " print(\n", + " f\"ERROR: Ingesting item for {external_item} failed. Request failed with status code: {response.status_code}\"\n", + " )\n", + " failed_ingest_items.append(external_item_path)\n", + " except requests.RequestException as e:\n", + " print(\n", + " f\"ERROR: Ingesting item for {external_item} failed. An error occurred during the request: {e}\"\n", + " )\n", + " failed_ingest_items.append(external_item_path)\n", + " except Exception as e:\n", + " print(\n", + " f\"ERROR: An unexpected error occurred while trying to ingest item for {external_item} failed: {e}\"\n", + " )\n", + " failed_ingest_items.append(external_item_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell publishes the collection to the target ingestion `api/collections` endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for collection in file_paths_and_collection_ids:\n", + " collection_id = collection[\"collectionId\"]\n", + " file_path = collection[\"filePath\"]\n", + "\n", + " try:\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", + " collection = json.load(file)\n", + "\n", + " # Publish the updated collection to the target ingestion `api/collections` endpoint\n", + " post_collection(collection, collection_id)\n", + "\n", + " except requests.RequestException as e:\n", + " print(f\"An error occurred for collectionId {collection_id}: {e}\")\n", + " except Exception as e:\n", + " print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell ingests the collection items:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for special_item_path in special_items_to_process:\n", + " try:\n", + " with open(special_item_path, \"r\", encoding=\"utf-8\") as file:\n", + " discovery_item_json = json.load(file)\n", + " print(discovery_item_json)\n", + "\n", + " if isinstance(discovery_item_json, list):\n", + " for single_discovery_item_json in discovery_item_json:\n", + " ingest_external_item(single_discovery_item_json, special_item_path)\n", + " else:\n", + " ingest_external_item(discovery_item_json, special_item_path)\n", + "\n", + " except requests.RequestException as e:\n", + " print(f\"An error occurred for item {special_item_path}: {e}\")\n", + " except Exception as e:\n", + " print(f\"An unexpected error occurred for item {special_item_path}: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(failed_ingest_items)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}