diff --git a/scripts/migrate-oonirun.py b/scripts/migrate-oonirun.py new file mode 100644 index 00000000..861d1981 --- /dev/null +++ b/scripts/migrate-oonirun.py @@ -0,0 +1,115 @@ +""" +Dump OONI Run links for clickhouse into postgres by performing appropriate +transformations. + +To setup run: + +pip install psycopg2 clickhouse-driver + +Then: + +OONI_PG_PASSWORD=XXXX python migrate-oonirun.py +""" +import os +import json +from pprint import pprint +from collections import defaultdict +from datetime import timedelta + +from clickhouse_driver import Client +import psycopg2 + +def dump_oonirun_links_clickhouse(): + client = Client("localhost") + + rows, cols = client.execute("SELECT * FROM oonirun", with_column_types=True) + col_names = list(map(lambda x: x[0], cols)) + + rows_by_id = defaultdict(list) + + for row in rows: + d = dict(zip(col_names, row)) + desc = json.loads(d["descriptor"]) + min_len_keys = [ + "short_description", + "description", + "name", + ] + for k in min_len_keys: + if not desc.get(k) or len(desc.get(k)) < 2: + desc[k] = "this has been autopopulated" + + if not desc["author"] or len(desc["author"]) < 2: + desc["author"] = "unknown-author" + + row = { + "oonirun_link_id": d["ooni_run_link_id"], + "date_created": d["descriptor_creation_time"], + "date_updated": d["translation_creation_time"], + "creator_account_id": d["creator_account_id"], + "revision": None, + "expiration_date": d["descriptor_creation_time"] + timedelta(days=6 * 30), + "name": desc["name"][:50], + "name_intl": None, + "short_description": desc["short_description"][:200], + "short_description_intl": None, + "description": desc["description"], + "description_intl": None, + "icon": desc["icon"], + "author": desc["author"], + "color": desc.get("color"), + "nettests": json.dumps(desc["nettests"]) + } + rows_by_id[row["oonirun_link_id"]].append(row) + + oonirun_links_with_revision = [] + for runlink_id, rows in rows_by_id.items(): + revision = 1 + first_created = None + for oonirun_link in sorted(rows, key=lambda r: r["date_created"]): + oonirun_link["revision"] = revision + if first_created is None: + first_created = oonirun_link["date_created"] + oonirun_link["date_created"] = first_created + oonirun_links_with_revision.append(oonirun_link) + revision += 1 + return oonirun_links_with_revision + +def insert_run_links_postgresql(data_to_insert): + db_params = { + 'dbname': 'oonipg', + 'user': 'oonipg', + 'password': os.environ["OONI_PG_PASSWORD"], + 'host': 'postgres.tier0.prod.ooni.nu' + } + + conn = psycopg2.connect(**db_params) + cur = conn.cursor() + + col_names = list(data_to_insert[0].keys()) + col_values = ["%s"]*len(col_names) + insert_query = f'INSERT INTO oonirun ({",".join(col_names)}) VALUES ({",".join(col_values)})' + + insert_count = 0 + try: + for row in data_to_insert: + values = [row[cn] for cn in col_names] + cur.execute(insert_query, values) + insert_count += 1 + conn.commit() + print("Data inserted successfully") + except Exception as e: + conn.rollback() + print(f"Failed after {insert_count} rows at row:") + print(row) + print(f"An error occurred: {e}") + raise e + finally: + # Close the cursor and connection + cur.close() + conn.close() + +valid_links = dump_oonirun_links_clickhouse() +print(len(valid_links)) +insert_run_links_postgresql(valid_links) +#pprint(valid_links) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index 73fb6b8b..69439852 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -347,7 +347,8 @@ locals { resource "aws_ecs_task_definition" "dataapi" { family = "ooni-dataapi-production-td" container_definitions = templatefile("${path.module}/templates/task_definition.json", { - image_url = "ooni/dataapi:${var.ooni_service_config.dataapi_version}", + # Image URL is updated via code build and code pipeline + image_url = "ooni/dataapi:latest", container_name = local.container_name, container_port = 80, log_group_region = var.aws_region, @@ -381,11 +382,6 @@ resource "aws_ecs_service" "dataapi" { force_new_deployment = true - triggers = { - # see: https://github.com/hashicorp/terraform-provider-aws/issues/28070#issuecomment-1824780763 - redeployment = plantimestamp() - } - tags = local.tags } diff --git a/tf/environments/prod/terraform.tfvars.json b/tf/environments/prod/terraform.tfvars.json deleted file mode 100644 index 328562d9..00000000 --- a/tf/environments/prod/terraform.tfvars.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "ooni_service_config": { - "dataapi_version": "20240226-f3c84e02" - } -}