-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This branch includes code needed to migrate OONI Run v2 over to the new host
- Loading branch information
Showing
3 changed files
with
117 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
""" | ||
Dump OONI Run links for clickhouse into postgres by performing appropriate | ||
transformations. | ||
To setup run: | ||
pip install psycopg2 clickhouse-driver | ||
Then: | ||
OONI_PG_PASSWORD=XXXX python migrate-oonirun.py | ||
""" | ||
import os | ||
import json | ||
from pprint import pprint | ||
from collections import defaultdict | ||
from datetime import timedelta | ||
|
||
from clickhouse_driver import Client | ||
import psycopg2 | ||
|
||
def dump_oonirun_links_clickhouse(): | ||
client = Client("localhost") | ||
|
||
rows, cols = client.execute("SELECT * FROM oonirun", with_column_types=True) | ||
col_names = list(map(lambda x: x[0], cols)) | ||
|
||
rows_by_id = defaultdict(list) | ||
|
||
for row in rows: | ||
d = dict(zip(col_names, row)) | ||
desc = json.loads(d["descriptor"]) | ||
min_len_keys = [ | ||
"short_description", | ||
"description", | ||
"name", | ||
] | ||
for k in min_len_keys: | ||
if not desc.get(k) or len(desc.get(k)) < 2: | ||
desc[k] = "this has been autopopulated" | ||
|
||
if not desc["author"] or len(desc["author"]) < 2: | ||
desc["author"] = "unknown-author" | ||
|
||
row = { | ||
"oonirun_link_id": d["ooni_run_link_id"], | ||
"date_created": d["descriptor_creation_time"], | ||
"date_updated": d["translation_creation_time"], | ||
"creator_account_id": d["creator_account_id"], | ||
"revision": None, | ||
"expiration_date": d["descriptor_creation_time"] + timedelta(days=6 * 30), | ||
"name": desc["name"][:50], | ||
"name_intl": None, | ||
"short_description": desc["short_description"][:200], | ||
"short_description_intl": None, | ||
"description": desc["description"], | ||
"description_intl": None, | ||
"icon": desc["icon"], | ||
"author": desc["author"], | ||
"color": desc.get("color"), | ||
"nettests": json.dumps(desc["nettests"]) | ||
} | ||
rows_by_id[row["oonirun_link_id"]].append(row) | ||
|
||
oonirun_links_with_revision = [] | ||
for runlink_id, rows in rows_by_id.items(): | ||
revision = 1 | ||
first_created = None | ||
for oonirun_link in sorted(rows, key=lambda r: r["date_created"]): | ||
oonirun_link["revision"] = revision | ||
if first_created is None: | ||
first_created = oonirun_link["date_created"] | ||
oonirun_link["date_created"] = first_created | ||
oonirun_links_with_revision.append(oonirun_link) | ||
revision += 1 | ||
return oonirun_links_with_revision | ||
|
||
def insert_run_links_postgresql(data_to_insert): | ||
db_params = { | ||
'dbname': 'oonipg', | ||
'user': 'oonipg', | ||
'password': os.environ["OONI_PG_PASSWORD"], | ||
'host': 'postgres.tier0.prod.ooni.nu' | ||
} | ||
|
||
conn = psycopg2.connect(**db_params) | ||
cur = conn.cursor() | ||
|
||
col_names = list(data_to_insert[0].keys()) | ||
col_values = ["%s"]*len(col_names) | ||
insert_query = f'INSERT INTO oonirun ({",".join(col_names)}) VALUES ({",".join(col_values)})' | ||
|
||
insert_count = 0 | ||
try: | ||
for row in data_to_insert: | ||
values = [row[cn] for cn in col_names] | ||
cur.execute(insert_query, values) | ||
insert_count += 1 | ||
conn.commit() | ||
print("Data inserted successfully") | ||
except Exception as e: | ||
conn.rollback() | ||
print(f"Failed after {insert_count} rows at row:") | ||
print(row) | ||
print(f"An error occurred: {e}") | ||
raise e | ||
finally: | ||
# Close the cursor and connection | ||
cur.close() | ||
conn.close() | ||
|
||
valid_links = dump_oonirun_links_clickhouse() | ||
print(len(valid_links)) | ||
insert_run_links_postgresql(valid_links) | ||
#pprint(valid_links) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.