Skip to content

Commit

Permalink
Migrate oonirun (#10)
Browse files Browse the repository at this point in the history
This branch includes code needed to migrate OONI Run v2 over to the new
host
  • Loading branch information
hellais authored Mar 5, 2024
1 parent 1878f1b commit 79d5746
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 11 deletions.
115 changes: 115 additions & 0 deletions scripts/migrate-oonirun.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
Dump OONI Run links for clickhouse into postgres by performing appropriate
transformations.
To setup run:
pip install psycopg2 clickhouse-driver
Then:
OONI_PG_PASSWORD=XXXX python migrate-oonirun.py
"""
import os
import json
from pprint import pprint
from collections import defaultdict
from datetime import timedelta

from clickhouse_driver import Client
import psycopg2

def dump_oonirun_links_clickhouse():
client = Client("localhost")

rows, cols = client.execute("SELECT * FROM oonirun", with_column_types=True)
col_names = list(map(lambda x: x[0], cols))

rows_by_id = defaultdict(list)

for row in rows:
d = dict(zip(col_names, row))
desc = json.loads(d["descriptor"])
min_len_keys = [
"short_description",
"description",
"name",
]
for k in min_len_keys:
if not desc.get(k) or len(desc.get(k)) < 2:
desc[k] = "this has been autopopulated"

if not desc["author"] or len(desc["author"]) < 2:
desc["author"] = "unknown-author"

row = {
"oonirun_link_id": d["ooni_run_link_id"],
"date_created": d["descriptor_creation_time"],
"date_updated": d["translation_creation_time"],
"creator_account_id": d["creator_account_id"],
"revision": None,
"expiration_date": d["descriptor_creation_time"] + timedelta(days=6 * 30),
"name": desc["name"][:50],
"name_intl": None,
"short_description": desc["short_description"][:200],
"short_description_intl": None,
"description": desc["description"],
"description_intl": None,
"icon": desc["icon"],
"author": desc["author"],
"color": desc.get("color"),
"nettests": json.dumps(desc["nettests"])
}
rows_by_id[row["oonirun_link_id"]].append(row)

oonirun_links_with_revision = []
for runlink_id, rows in rows_by_id.items():
revision = 1
first_created = None
for oonirun_link in sorted(rows, key=lambda r: r["date_created"]):
oonirun_link["revision"] = revision
if first_created is None:
first_created = oonirun_link["date_created"]
oonirun_link["date_created"] = first_created
oonirun_links_with_revision.append(oonirun_link)
revision += 1
return oonirun_links_with_revision

def insert_run_links_postgresql(data_to_insert):
db_params = {
'dbname': 'oonipg',
'user': 'oonipg',
'password': os.environ["OONI_PG_PASSWORD"],
'host': 'postgres.tier0.prod.ooni.nu'
}

conn = psycopg2.connect(**db_params)
cur = conn.cursor()

col_names = list(data_to_insert[0].keys())
col_values = ["%s"]*len(col_names)
insert_query = f'INSERT INTO oonirun ({",".join(col_names)}) VALUES ({",".join(col_values)})'

insert_count = 0
try:
for row in data_to_insert:
values = [row[cn] for cn in col_names]
cur.execute(insert_query, values)
insert_count += 1
conn.commit()
print("Data inserted successfully")
except Exception as e:
conn.rollback()
print(f"Failed after {insert_count} rows at row:")
print(row)
print(f"An error occurred: {e}")
raise e
finally:
# Close the cursor and connection
cur.close()
conn.close()

valid_links = dump_oonirun_links_clickhouse()
print(len(valid_links))
insert_run_links_postgresql(valid_links)
#pprint(valid_links)
8 changes: 2 additions & 6 deletions tf/environments/prod/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,8 @@ locals {
resource "aws_ecs_task_definition" "dataapi" {
family = "ooni-dataapi-production-td"
container_definitions = templatefile("${path.module}/templates/task_definition.json", {
image_url = "ooni/dataapi:${var.ooni_service_config.dataapi_version}",
# Image URL is updated via code build and code pipeline
image_url = "ooni/dataapi:latest",
container_name = local.container_name,
container_port = 80,
log_group_region = var.aws_region,
Expand Down Expand Up @@ -381,11 +382,6 @@ resource "aws_ecs_service" "dataapi" {

force_new_deployment = true

triggers = {
# see: https://github.com/hashicorp/terraform-provider-aws/issues/28070#issuecomment-1824780763
redeployment = plantimestamp()
}

tags = local.tags
}

Expand Down
5 changes: 0 additions & 5 deletions tf/environments/prod/terraform.tfvars.json

This file was deleted.

0 comments on commit 79d5746

Please sign in to comment.