From 0160977d747ff78fe0801086100ea46e5ff77e14 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 17:37:25 +0800 Subject: [PATCH 01/30] chore: code cleanup --- database.py | 6 ------ server.py | 8 -------- 2 files changed, 14 deletions(-) diff --git a/database.py b/database.py index a3d737a..cee11b0 100644 --- a/database.py +++ b/database.py @@ -446,12 +446,6 @@ def delete_annotation(self, record_id: str, annotator: str): self.mercury_db.execute(sql_cmd, (int(record_id), annotator)) self.mercury_db.commit() - @database_lock() - def add_user(self, user_id: str, user_name: str): # TODO: remove this method since now only admin can add user - sql_cmd = "INSERT INTO users (user_id, user_name) VALUES (?, ?)" - self.mercury_db.execute(sql_cmd, (user_id, user_name)) - self.mercury_db.commit() - @database_lock() def change_user_name(self, user_id: str, user_name: str): self.user_db.execute("UPDATE users SET user_name = ? WHERE user_id = ?", (user_name, user_id)) diff --git a/server.py b/server.py index 18f3ebb..b42c369 100644 --- a/server.py +++ b/server.py @@ -135,14 +135,6 @@ async def get_labels() -> list: # get all candidate labels for human annotators return labels -@app.get("/user/new") # please update the route name to be more meaningful, e.g., /user/new_user -async def create_new_user(): - user_id = uuid.uuid4().hex - user_name = "New User" - database.add_user(user_id, user_name) - return {"key": user_id, "name": user_name} - - @app.get("/user/me") async def get_user(token: Annotated[str, Depends(oauth2_scheme)], config: Config = Depends(get_config)) -> User: credentials_exception = HTTPException( From 870201740245dbd2d5cf1ed00781359726177d64 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 18:12:28 +0800 Subject: [PATCH 02/30] feat(user_utils)!: manage user purely from a CSV file --- user_utils.py | 101 +++++++++++++++++++------------------------------- 1 file changed, 39 insertions(+), 62 deletions(-) diff --git a/user_utils.py b/user_utils.py index 3f71c6a..409170c 100644 --- a/user_utils.py +++ b/user_utils.py @@ -1,3 +1,4 @@ +import csv import sqlite3 import uuid import argon2 @@ -11,8 +12,9 @@ def generate_random_string(length=16): return ''.join(random.choice(characters) for _ in range(length)) -class DatabaseUtils: - def __init__(self, db_path): +class UserUtils: + def __init__(self, db_path, csv_path): + self.csv_path = csv_path self.conn = sqlite3.connect(db_path) self.cursor = self.conn.cursor() self.ph = argon2.PasswordHasher(time_cost=2, memory_cost=19456, parallelism=1) @@ -57,79 +59,54 @@ def delete_user(self, user_id): self.cursor.execute("DELETE FROM users WHERE user_id = ?", (user_id,)) self.conn.commit() + + def export(self): + csv_fp = open(self.csv_path, "w", newline="") + writer = csv.writer(csv_fp) + writer.writerow(["user_id", "user_name", "email", "password"]) + rows = self.cursor.execute("SELECT * FROM users") + for user_id, user_name, email, _ in rows: + writer.writerow([user_id, user_name, email, ""]) # Passwords are hashed, so we don't export them + csv_fp.close() + + def apply(self): + csv_fp = open(self.csv_path, newline="") + reader = csv.DictReader(csv_fp) + for row in reader: + user_id = row["user_id"] + password = row["password"] + user_name = row["user_name"] + email = row["email"] + if password is not None and password != "": + self.reset_user_password(user_id, password) + if user_name is not None and user_name != "": + self.change_user_name(user_id, user_name) + if email is not None and email != "": + self.change_user_email(user_id, email) + csv_fp.close() + def close(self): self.conn.close() def main(): main_parser = argparse.ArgumentParser(description="Manage users") - main_parser.add_argument("--sqlite_path", type=str, required=True, help="Path to the user SQLite database") + main_parser.add_argument("--sqlite_path", type=str, help="Path to the user SQLite database", default="./users.sqlite") + main_parser.add_argument("--csv", type=str, help="Path to the CSV file", default="./users.csv") user_commands_parser = main_parser.add_subparsers(dest="command", required=True) - new_user_parser = user_commands_parser.add_parser("new", help="Create a new user") - new_user_parser.add_argument("--user_name", type=str, required=True, help="Username of the new user") - new_user_parser.add_argument("--email", type=str, required=True, help="Email of the new user") - new_user_parser.add_argument("--password", type=str, help="Password of the new user") - - delete_user_parser = user_commands_parser.add_parser("delete", help="Delete a user") - delete_user_parser.add_argument("--user_id", type=str, required=True, help="User ID to delete") - - reset_password_parser = user_commands_parser.add_parser("reset_password", help="Reset a user's password") - reset_password_parser.add_argument("--user_id", type=str, required=True, help="User ID to reset the password") - reset_password_parser.add_argument("--new_password", type=str, help="New password for the user") - - change_email_parser = user_commands_parser.add_parser("change_email", help="Change a user's email") - change_email_parser.add_argument("--user_id", type=str, required=True, help="User ID to change the email") - change_email_parser.add_argument("--new_email", type=str, required=True, help="New email for the user") - - change_username_parser = user_commands_parser.add_parser("change_username", help="Change a user's username") - change_username_parser.add_argument("--user_id", type=str, required=True, help="User ID to change the username") - change_username_parser.add_argument("--new_username", type=str, required=True, help="New username for the user") - - get_user_parser = user_commands_parser.add_parser("get", help="Get a user") - get_user_parser.add_argument("--user_id", type=str, help="User ID to get") - get_user_parser.add_argument("--email", type=str, help="Email to get") + user_commands_parser.add_parser("export", help="Export users to a CSV file") + user_commands_parser.add_parser("apply", help="Import users from a CSV file") args = main_parser.parse_args() - db_utils = DatabaseUtils(args.sqlite_path) + db_utils = UserUtils(args.sqlite_path, args.csv) match args.command: - case "new": - if args.password: - password = db_utils.new_user(args.user_name, args.email, args.password) - else: - password = db_utils.new_user(args.user_name, args.email) - print(f"New user created with password: {password}") - case "delete": - db_utils.delete_user(args.user_id) - print("User deleted") - case "reset_password": - if args.new_password: - new_password = db_utils.reset_user_password(args.user_id, args.new_password) - else: - new_password = db_utils.reset_user_password(args.user_id) - print(f"Password reset to: {new_password}") - case "change_email": - db_utils.change_user_email(args.user_id, args.new_email) - print("Email changed") - case "change_username": - db_utils.change_user_name(args.user_id, args.new_username) - print("Username changed") - case "get": - if args.user_id: - user = db_utils.get_user_by_id(args.user_id) - elif args.email: - user = db_utils.get_user_by_email(args.email) - else: - user = None - - if user: - print(f"User ID: {user[0]}") - print(f"Username: {user[1]}") - print(f"Email: {user[2]}") - else: - print("User not found") + case "export": + db_utils.export() + case "apply": + db_utils.apply() case _: print("Invalid command") From 6046a8077c6f800e39742a12f84f764179245d58 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 19:15:15 +0800 Subject: [PATCH 03/30] feat(user_utils): delete user through csv --- user_utils.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/user_utils.py b/user_utils.py index 409170c..885081d 100644 --- a/user_utils.py +++ b/user_utils.py @@ -47,7 +47,9 @@ def get_user_by_id(self, user_id): self.cursor.execute("SELECT * FROM users WHERE user_id = ?", (user_id,)) return self.cursor.fetchone() - def new_user(self, user_name, email, password=generate_random_string()): + def new_user(self, user_name, email, password): + if password is None or password == "": + password = generate_random_string() hashed_password = self.ph.hash(password) user_id = uuid.uuid4().hex self.cursor.execute("INSERT INTO users (user_id, user_name, email, hashed_password) VALUES (?, ?, ?, ?)", @@ -63,13 +65,13 @@ def delete_user(self, user_id): def export(self): csv_fp = open(self.csv_path, "w", newline="") writer = csv.writer(csv_fp) - writer.writerow(["user_id", "user_name", "email", "password"]) + writer.writerow(["user_id", "user_name", "email", "password", "delete"]) rows = self.cursor.execute("SELECT * FROM users") for user_id, user_name, email, _ in rows: - writer.writerow([user_id, user_name, email, ""]) # Passwords are hashed, so we don't export them + writer.writerow([user_id, user_name, email, "", 0]) # Passwords are hashed, so we don't export them csv_fp.close() - def apply(self): + def apply(self, destructive: bool): csv_fp = open(self.csv_path, newline="") reader = csv.DictReader(csv_fp) for row in reader: @@ -77,6 +79,18 @@ def apply(self): password = row["password"] user_name = row["user_name"] email = row["email"] + delete = row["delete"] + if (user_id is None or self.get_user_by_id(user_id) is None) and delete != "1": + new_password = self.new_user(user_name, email, password) + print(f"Created new user {user_name} with email {email} and password {new_password}") + break + if delete == "1": + if destructive: + self.delete_user(user_id) + print(f"Deleted user {user_id}") + else: + print(f"To delete user {user_id}, use the --destructive or -d flag") + continue if password is not None and password != "": self.reset_user_password(user_id, password) if user_name is not None and user_name != "": @@ -96,7 +110,8 @@ def main(): user_commands_parser = main_parser.add_subparsers(dest="command", required=True) user_commands_parser.add_parser("export", help="Export users to a CSV file") - user_commands_parser.add_parser("apply", help="Import users from a CSV file") + apply_parser = user_commands_parser.add_parser("apply", help="Import users from a CSV file") + apply_parser.add_argument("-d", "--destructive", action="store_true", help="") args = main_parser.parse_args() @@ -106,7 +121,7 @@ def main(): case "export": db_utils.export() case "apply": - db_utils.apply() + db_utils.apply(args.destructive) case _: print("Invalid command") From fb9641fb12d4cc659ec88ab16029afb60ffa3616 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 19:17:38 +0800 Subject: [PATCH 04/30] chore: vocabulary --- user_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/user_utils.py b/user_utils.py index 885081d..67af63d 100644 --- a/user_utils.py +++ b/user_utils.py @@ -110,8 +110,8 @@ def main(): user_commands_parser = main_parser.add_subparsers(dest="command", required=True) user_commands_parser.add_parser("export", help="Export users to a CSV file") - apply_parser = user_commands_parser.add_parser("apply", help="Import users from a CSV file") - apply_parser.add_argument("-d", "--destructive", action="store_true", help="") + apply_parser = user_commands_parser.add_parser("apply", help="Apply changes from a CSV file") + apply_parser.add_argument("-d", "--destructive", action="store_true", help="Delete users") args = main_parser.parse_args() From 88f03ee288357672ccb4bb07e62727dfed40cb60 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 21:05:03 +0800 Subject: [PATCH 05/30] docs: user administration --- README.md | 25 +++++++++++++++++++++++-- user_utils.py | 7 +++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 554e715..4fcaabf 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,29 @@ Mercury uses [`sqlite-vec`](https://github.com/asg017/sqlite-vec) to store and s 2. `pnpm install && pnpm build` (You need to recompile the frontend each time the UI code changes.) 3. Manually set the labels for annotators to choose from in the `labels.yaml` file. Mercury supports hierarchical labels. 4. Generate and set a JWT secret key: `export SECRET_KEY=$(openssl rand -base64 32)`. You can rerun the command above to generate a new secret key when needed, especially when the old one is compromised. Note that changing the JWT token will log out all users. Optionally, you can also set `EXPIRE_MINUTES` to change the expiration time of the JWT token. The default is 7 days (10080 minutes). -5. Administer the users: `python3 user_utils.py -h`. You need to create users before they can work on the annotation task. You can register new users, reset passwords, and delete users. User credentials are stored in a separate SQLite database, denoted as `USER_DB` in the following steps. -6. Start the Mercury annotation server: `python3 server.py --mercury_db {MERCURY_DB} --user_db {USER_DB}`. Be sure to set the candidate labels to choose from in the `labels.yaml` file. +5. Start the Mercury annotation server: `python3 server.py --mercury_db {MERCURY_DB} --user_db {USER_DB}`. Be sure to set the candidate labels to choose from in the `labels.yaml` file. + +### Administer the users + +Administration is done via Python script and csv file. + +1. Export user data: `python3 user_utils.py export` +2. Edit csv file + + | user_id | user_name | email | password | delete | + |---------|-----------|--------------|---------------------------------------------------|----------------------------| + | user_id | user_name | unique email | empty. fill new password if you want to change it | initial: 0 (not to delete) | + + 1. Do not edit `user_id`. If you want to create a new user, create a raw and left `user_id` empty. + + When creating new user, left `password` empty to let the script generate a random password. + 2. Edit `user_name`, `email`, `password` if you want to change them. Left them unchanged or empty if you don't. + 3. Change `delete` to 1 if you want to delete a user. If `user_id` is empty, this has no effect and a new user will be created. + +3. Apply changes: `python3 user_utils.py apply` + + If you want to delete users, confirm with `-d` flag: `python3 user_utils.py apply -d` + The annotations are stored in the `annotations` table in a SQLite database (hardcoded name `mercury.sqlite`). See the section [`annotations` table](#annotations-table-the-human-annotations) for the schema. diff --git a/user_utils.py b/user_utils.py index 67af63d..7ee322c 100644 --- a/user_utils.py +++ b/user_utils.py @@ -80,10 +80,10 @@ def apply(self, destructive: bool): user_name = row["user_name"] email = row["email"] delete = row["delete"] - if (user_id is None or self.get_user_by_id(user_id) is None) and delete != "1": + if user_id is None: new_password = self.new_user(user_name, email, password) print(f"Created new user {user_name} with email {email} and password {new_password}") - break + continue if delete == "1": if destructive: self.delete_user(user_id) @@ -91,6 +91,9 @@ def apply(self, destructive: bool): else: print(f"To delete user {user_id}, use the --destructive or -d flag") continue + if self.get_user_by_id(user_id) is None: + print(f"User {user_id} does not exist, ignored.") + continue if password is not None and password != "": self.reset_user_password(user_id, password) if user_name is not None and user_name != "": From 9faff6bf5538cc31137ddf7943c5ce0e1aad84ea Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 21:07:50 +0800 Subject: [PATCH 06/30] docs: user administration Closes: #18 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 4fcaabf..249040d 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,8 @@ Administration is done via Python script and csv file. If you want to delete users, confirm with `-d` flag: `python3 user_utils.py apply -d` +**Note** that this script does not have any validation. Please make sure the csv file is correct if you get errors. + The annotations are stored in the `annotations` table in a SQLite database (hardcoded name `mercury.sqlite`). See the section [`annotations` table](#annotations-table-the-human-annotations) for the schema. From bd72e421c3fdaeb6b7c4914580a916953af1b9bc Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:49:24 +0800 Subject: [PATCH 07/30] feat:(database): version control --- database.py | 8 ++++++++ ingester.py | 5 +++++ version.py | 1 + 3 files changed, 14 insertions(+) create mode 100644 version.py diff --git a/database.py b/database.py index a3d737a..1ca7e65 100644 --- a/database.py +++ b/database.py @@ -10,6 +10,7 @@ import sqlite_vec from dotenv import load_dotenv +from version import __version__ class OldLabelData(TypedDict): # readable by frontend @@ -162,6 +163,13 @@ def __init__(self, mercury_db_path: str, user_db_path: str): # prepare the database mercury_db = sqlite3.connect(mercury_db_path) print("Open db at ", mercury_db_path) + version = mercury_db.execute("SELECT value FROM config WHERE key = 'version'").fetchone() + if version is None: + mercury_db.execute("INSERT INTO config (key, value) VALUES ('version', ?)", (__version__,)) + else: + if version[0] != __version__: + print("Database version mismatch. Please migrate the database.") + exit(1) mercury_db.execute("CREATE TABLE IF NOT EXISTS annotations (\ annot_id INTEGER PRIMARY KEY AUTOINCREMENT, \ sample_id INTEGER, \ diff --git a/ingester.py b/ingester.py index fadb404..4a8ad54 100644 --- a/ingester.py +++ b/ingester.py @@ -9,6 +9,7 @@ from dotenv import load_dotenv from tqdm.auto import tqdm +from version import __version__ import struct @@ -138,6 +139,10 @@ def prepare_db(self): "INSERT OR REPLACE INTO config (key, value) VALUES ('embedding_dimension', ?)", [self.embedding_dimension], ) + self.db.execute( + "INSERT OR REPLACE INTO config (key, value) VALUES ('version', ?)", + [__version__] + ) self.db.commit() diff --git a/version.py b/version.py new file mode 100644 index 0000000..a68927d --- /dev/null +++ b/version.py @@ -0,0 +1 @@ +__version__ = "0.1.0" \ No newline at end of file From 5515a7687e096e46f4908decf217776dada34621 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:53:13 +0800 Subject: [PATCH 08/30] feat: mercury version --- database.py | 1 + ingester.py | 2 ++ server.py | 3 +++ 3 files changed, 6 insertions(+) diff --git a/database.py b/database.py index 1ca7e65..0555612 100644 --- a/database.py +++ b/database.py @@ -698,6 +698,7 @@ def get_env_id_value(env_name: str) -> int | None: parser.add_argument("--mercury_db_path", type=str, required=True, help="Path to the Mercury SQLite database") parser.add_argument("--user_db_path", type=str, required=True, help="Path to the user SQLite database") parser.add_argument("--dump_file", type=str, required=True, default="mercury_annotations.json") + parser.add_argument("--version", action="version", version="__version__") args = parser.parse_args() # db = Database(args.annotation_corpus_id) diff --git a/ingester.py b/ingester.py index 4a8ad54..3f6357f 100644 --- a/ingester.py +++ b/ingester.py @@ -254,9 +254,11 @@ def get_env_id_value(env_name: str) -> int | None: default="summary", help="The name of the 2nd column to ingest", ) + parser.add_argument("--version", action="version", version="__version__") args = parser.parse_args() + print("Mercury version: ", __version__) print("Ingesting data") ingester = Ingester( file_to_ingest=args.file_to_ingest, diff --git a/server.py b/server.py index 18f3ebb..1907d31 100644 --- a/server.py +++ b/server.py @@ -26,6 +26,7 @@ import sqlite_vec from ingester import Embedder from database import Database +from version import __version__ import jwt from jwt.exceptions import InvalidTokenError @@ -449,6 +450,7 @@ async def login(): parser.add_argument("--mercury_db", type=str, required=True, default="./mercury.sqlite") parser.add_argument("--user_db", type=str, required=True, default="./user.sqlite") parser.add_argument("--port", type=int, default=8000) + parser.add_argument("--version", action="version", version="__version__") args = parser.parse_args() env_secret_key = os.getenv("SECRET_KEY") @@ -458,6 +460,7 @@ async def login(): expire = int(os.getenv("EXPIRE_MINUTES", 10080)) env_config = Config(secret_key=env_secret_key, expire=expire) + print("Mercury version: ", __version__) print("Using Mercury SQLite db: ", args.mercury_db) print("Using User SQLite db: ", args.user_db) From 7d7fd28b0e06d129a57b7146c9369024f1a65dd6 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 15:21:42 +0800 Subject: [PATCH 09/30] fix: config key is not unique --- database.py | 3 ++- ingester.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/database.py b/database.py index 0555612..702dd54 100644 --- a/database.py +++ b/database.py @@ -165,7 +165,8 @@ def __init__(self, mercury_db_path: str, user_db_path: str): print("Open db at ", mercury_db_path) version = mercury_db.execute("SELECT value FROM config WHERE key = 'version'").fetchone() if version is None: - mercury_db.execute("INSERT INTO config (key, value) VALUES ('version', ?)", (__version__,)) + print("Can not determine database version.") + exit(1) else: if version[0] != __version__: print("Database version mismatch. Please migrate the database.") diff --git a/ingester.py b/ingester.py index 3f6357f..8337e1e 100644 --- a/ingester.py +++ b/ingester.py @@ -119,7 +119,7 @@ def prepare_db(self): f"CREATE VIRTUAL TABLE embeddings USING vec0(embedding float[{self.embedding_dimension}])" ) self.db.execute( - "CREATE TABLE IF NOT EXISTS config (key TEXT PRIMARY KEY, value TEXT)" + "CREATE TABLE IF NOT EXISTS config (key TEXT PRIMARY KEY UNIQUE , value TEXT)" ) self.db.execute( "CREATE TABLE IF NOT EXISTS sample_meta (sample_id INTEGER PRIMARY KEY, json_meta TEXT)" From 0ef373f9f52494cb8c42205e771dcca7bbb9db99 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 15:23:45 +0800 Subject: [PATCH 10/30] feat: migrate to database with version control --- migration/database_version_control.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 migration/database_version_control.py diff --git a/migration/database_version_control.py b/migration/database_version_control.py new file mode 100644 index 0000000..4259bba --- /dev/null +++ b/migration/database_version_control.py @@ -0,0 +1,27 @@ +import argparse +import sqlite3 + + +class Migrator: + def __init__(self, db_path): + conn = sqlite3.connect(db_path) + self.conn = conn + version = self.conn.execute("SELECT count(*) FROM config WHERE key = 'version'").fetchone() + if version[0] != 0: + print("Can not migrate database with existing version") + exit(0) + + def migrate(self): + self.conn.execute("ALTER TABLE config RENAME TO config_old") + self.conn.execute("CREATE TABLE config(key TEXT PRIMARY KEY UNIQUE , value TEXT)") + self.conn.execute("INSERT INTO config SELECT key, value FROM config_old") + self.conn.execute("INSERT INTO config VALUES ('version', '0.1.0')") + self.conn.commit() + print("Migration completed") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Migrate the database to 0.1.0") + parser.add_argument("--db_path", help="Path to the database", default="../mercury.sqlite") + args = parser.parse_args() + migrator = Migrator(args.db_path) + migrator.migrate() From 8d5adb0694b9f3c4e85ea763bca47bb31fb1fad7 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 15:25:24 +0800 Subject: [PATCH 11/30] chore: make code more readable --- database.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/database.py b/database.py index 702dd54..ec78696 100644 --- a/database.py +++ b/database.py @@ -167,8 +167,7 @@ def __init__(self, mercury_db_path: str, user_db_path: str): if version is None: print("Can not determine database version.") exit(1) - else: - if version[0] != __version__: + elif version[0] != __version__: print("Database version mismatch. Please migrate the database.") exit(1) mercury_db.execute("CREATE TABLE IF NOT EXISTS annotations (\ From b56587e99a7b4505a4f5d7eafaa9431d647ddbf7 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 15:34:28 +0800 Subject: [PATCH 12/30] fix:: drop old table --- migration/database_version_control.py | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/database_version_control.py b/migration/database_version_control.py index 4259bba..043b0f9 100644 --- a/migration/database_version_control.py +++ b/migration/database_version_control.py @@ -16,6 +16,7 @@ def migrate(self): self.conn.execute("CREATE TABLE config(key TEXT PRIMARY KEY UNIQUE , value TEXT)") self.conn.execute("INSERT INTO config SELECT key, value FROM config_old") self.conn.execute("INSERT INTO config VALUES ('version', '0.1.0')") + self.conn.execute("DROP TABLE config_old") self.conn.commit() print("Migration completed") From 37ce30f2d684a379350607c5ff185d3b5fbea229 Mon Sep 17 00:00:00 2001 From: forrestbao Date: Mon, 13 Jan 2025 22:47:30 -0600 Subject: [PATCH 13/30] user admin without CSV --- README.md | 42 +++------ user_admin.md | 66 ++++++++++++++ user_admin.py | 245 ++++++++++++++++++++++++++++++++++++++++++++++++++ user_utils.py | 135 ---------------------------- users.sqlite | Bin 0 -> 16384 bytes 5 files changed, 322 insertions(+), 166 deletions(-) create mode 100644 user_admin.md create mode 100644 user_admin.py delete mode 100644 user_utils.py create mode 100644 users.sqlite diff --git a/README.md b/README.md index 249040d..3734c73 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Currently, Mercury only supports labeling inconsistencies between the source and ![Header](usage/selection_from_highlight.png) -## Dependencies +## Dependencies and setup > [!NOTE] > You need Python and Node.js. @@ -22,7 +22,9 @@ Mercury uses [`sqlite-vec`](https://github.com/asg017/sqlite-vec) to store and s 2. If you don't have `pnpm` installed, please install with `npm install -g pnpm` - you may need `sudo`. If you don't have `npm`, try `sudo apt install npm`. -3. To use `sqlite-vec` via Python's built-in `sqlite3` module, you must have SQLite>3.41 (otherwise `LIMIT` or `k=?` will not work properly with `rowid IN (?)` for vector search) installed and ensure Python's built-in `sqlite3` module is built for SQLite>3.41. Note that Python's built-in `sqlite3` module uses its own binary library that is independent of the OS's SQLite. So upgrading the OS's SQLite will not upgrade Python's `sqlite3` module. +3. Compile the frontend: `pnpm install && pnpm build` + +4. To use `sqlite-vec` via Python's built-in `sqlite3` module, you must have SQLite>3.41 (otherwise `LIMIT` or `k=?` will not work properly with `rowid IN (?)` for vector search) installed and ensure Python's built-in `sqlite3` module is built for SQLite>3.41. Note that Python's built-in `sqlite3` module uses its own binary library that is independent of the OS's SQLite. So upgrading the OS's SQLite will not upgrade Python's `sqlite3` module. To manually upgrade Python's `sqlite3` module to use SQLite>3.41, here are the steps: * Download and compile SQLite>3.41.0 from source ```bash @@ -48,7 +50,7 @@ Mercury uses [`sqlite-vec`](https://github.com/asg017/sqlite-vec) to store and s * If you are using Mac and run into troubles, please follow SQLite-vec's [instructions](https://alexgarcia.xyz/sqlite-vec/python.html#updated-sqlite). -4. To use `sqlite-vec` directly in `sqlite` prompt, simply [compile +5. To use `sqlite-vec` directly in `sqlite` prompt, simply [compile `sqlite-vec` from source](https://alexgarcia.xyz/sqlite-vec/compiling.html) and load the compiled `vec0.o`. The usage can be found in the SQLite-vec's [README](https://github.com/asg017/sqlite-vec?tab=readme-ov-file#sample-usage). @@ -58,36 +60,14 @@ Mercury uses [`sqlite-vec`](https://github.com/asg017/sqlite-vec) to store and s Run `python3 ingester.py -h` to see the options. - The ingester takes a CSV, JSON, or JSONL file and loads texts from two text columns (configurable via option `ingest_column_1` and `ingest_column_2` which default to `source` and `summary`) of the file. After ingestion, the data will be stored in the SQLite database, denoted as `MERCURY_DB` in the following steps. - -2. `pnpm install && pnpm build` (You need to recompile the frontend each time the UI code changes.) -3. Manually set the labels for annotators to choose from in the `labels.yaml` file. Mercury supports hierarchical labels. -4. Generate and set a JWT secret key: `export SECRET_KEY=$(openssl rand -base64 32)`. You can rerun the command above to generate a new secret key when needed, especially when the old one is compromised. Note that changing the JWT token will log out all users. Optionally, you can also set `EXPIRE_MINUTES` to change the expiration time of the JWT token. The default is 7 days (10080 minutes). -5. Start the Mercury annotation server: `python3 server.py --mercury_db {MERCURY_DB} --user_db {USER_DB}`. Be sure to set the candidate labels to choose from in the `labels.yaml` file. - -### Administer the users - -Administration is done via Python script and csv file. - -1. Export user data: `python3 user_utils.py export` -2. Edit csv file - - | user_id | user_name | email | password | delete | - |---------|-----------|--------------|---------------------------------------------------|----------------------------| - | user_id | user_name | unique email | empty. fill new password if you want to change it | initial: 0 (not to delete) | - - 1. Do not edit `user_id`. If you want to create a new user, create a raw and left `user_id` empty. - - When creating new user, left `password` empty to let the script generate a random password. - 2. Edit `user_name`, `email`, `password` if you want to change them. Left them unchanged or empty if you don't. - 3. Change `delete` to 1 if you want to delete a user. If `user_id` is empty, this has no effect and a new user will be created. - -3. Apply changes: `python3 user_utils.py apply` - - If you want to delete users, confirm with `-d` flag: `python3 user_utils.py apply -d` + The ingester takes a CSV, JSON, or JSONL file and loads texts from two text columns (configurable via option `ingest_column_1` and `ingest_column_2` which default to `source` and `summary`) of the file. After ingestion, the data will be stored in the SQLite database, denoted as `CORPUS_DB` in the following steps. -**Note** that this script does not have any validation. Please make sure the csv file is correct if you get errors. +2. Manually set the labels for annotators to choose from in the `labels.yaml` file. Mercury supports hierarchical labels. +3. Generate and set a JWT secret key: `export SECRET_KEY=$(openssl rand -base64 32)`. You can rerun the command above to generate a new secret key when needed, especially when the old one is compromised. Note that changing the JWT token will log out all users. Optionally, you can also set `EXPIRE_MINUTES` to change the expiration time of the JWT token. The default is 7 days (10080 minutes). +4. Start the Mercury annotation server: `python3 server.py --corpus_db {CORPUS_DB} --user_db {USER_DB}`. + Be sure to set the candidate labels to choose from in the `labels.yaml` file. The server will run on `http://localhost:8000` by default. The default `USER_DB`, namely `users.sqlite`, is distributed with the code repo with the default Email and password as `test@example.com` and `test`, respectively. +5. **Optional** To add/update/list users in a `USER_DB`, see [User administration in Mercury](user_admin.md) for more details. The annotations are stored in the `annotations` table in a SQLite database (hardcoded name `mercury.sqlite`). See the section [`annotations` table](#annotations-table-the-human-annotations) for the schema. diff --git a/user_admin.md b/user_admin.md new file mode 100644 index 0000000..dc9e9b9 --- /dev/null +++ b/user_admin.md @@ -0,0 +1,66 @@ +# User administration in Mercury + +Mercury uses a SQLite DB for user info (denoted as `USER_DB`) that is separate from the main corpus DB `CORPUS_DB`. By decoupling the user administration from the corpus, we can have a single user DB for multiple corpora and the annotation is always de-anonymized. The Default names for the user DB is `users.sqlite`. + +In a Mercury `USER_DB`, the following fields are stored for each user: +* `user_id`: Hash string that uniquely identifies a user +* `user_name`: User's name (for display purpose only, not for login) +* `email`: User's email (for login) +* `hashed_password`: Hashed password (for login) + +The script for user administration is `user_admin.py`. + +Actions that can be performed: +* Creating a new user + + There are two ways to create a new user: + + 1. Using interactive mode: + ```bash + python user_admin.py new + ``` + then follow the prompts. + + 2. Using command line arguments: + + ```bash + python user_admin.py new -n -e -p + ``` + For example, to create a user with name `Test User`, email `test@example.com` and a random password: + + ```bash + python user_admin.py new -n "Test User" -e "test@example.com" + ``` + +* Listing all users + + ```bash + python user_admin.py list + ``` + +* Changing the password or email of a user, including resetting password + + There are two ways to update a user's info: + 1. Using interactive mode: + + ```bash + python user_admin.py update + ``` + then follow the prompts. + + 2. Using command line arguments: + ```bash + python user_admin.py update -k -v -f -n + ``` + + For example, to change the password of a user with email `test@example.com` to `abcdefg`: + + ```bash + python user_admin.py update -k email -v test@example.com -f password -n abcdefg + ``` + +For various reasons, Mercury does not support deleting users. However, you can simply change the password of a user to a random string to effectively disable the user. + + + +Mercury has minimal exception handling for user administration. \ No newline at end of file diff --git a/user_admin.py b/user_admin.py new file mode 100644 index 0000000..60c7487 --- /dev/null +++ b/user_admin.py @@ -0,0 +1,245 @@ +import csv +import sqlite3 +import uuid +import argon2 +import random +import string +import argparse + +def generate_random_string(length=16): + characters = string.ascii_letters + string.digits + string.punctuation + return ''.join(random.choice(characters) for _ in range(length)) + +class UserUtils: + def __init__(self, db_path): + self.conn = sqlite3.connect(db_path) + self.cursor = self.conn.cursor() + self.ph = argon2.PasswordHasher(time_cost=2, memory_cost=19456, parallelism=1) + self.conn.execute("""CREATE TABLE IF NOT EXISTS users ( + user_id TEXT PRIMARY KEY, + user_name TEXT NOT NULL, + email TEXT NOT NULL UNIQUE, + hashed_password TEXT NOT NULL)""") + self.conn.commit() + + def update_user(self, look_up_method:str=None, look_up_value:str=None, field_to_update:str=None, field_new_value:str=None): + look_up_method_mappping = { + "1": "user_id", + "2": "email", + "3": "user_name" + } + + field_to_update_mapping = { + "1": "email", + "2": "password", + "3": "user_name" + } + + prompt_mapping = { + "password": "Password of the user (leave empty for a random one): ", + "email": "New email for the user: ", + "user_name": "New display name for the user: " + } + + # Find the user to update + if look_up_method is None: + look_up_method = input("How do you wanna look up the user? (Select the number below) \n 1. By user_id (hex) \n 2. By email \n 3. By user_name \n") + look_up_method = look_up_method_mappping[look_up_method] + + if look_up_value is None: + look_up_value = input(f"Please enter the {look_up_method} of the user whom you wanna update: ") + + sql_cmd = f"SELECT * FROM users WHERE {look_up_method} = '{look_up_value}'" + + self.cursor.execute(sql_cmd) + user = self.cursor.fetchone() + if user is None: + print(f"User of {look_up_method} == {look_up_value} not found. Please try again.") + exit() + + # Update the user + if field_to_update is None: + field_to_update = input("What field do you want to update? (Select the number below) \n 1. Email \n 2. Password \n 3. User Name \n") + field_to_update = field_to_update_mapping[field_to_update] + + if field_new_value is None: + field_new_value = input(prompt_mapping[field_to_update]) + + plain_field_value = field_new_value + if field_to_update == "password": # password + if field_new_value == "": + plain_field_value = generate_random_string(length=6) + + if field_to_update == "password": # change name only right before SQL command + field_to_update = "hashed_password" + field_new_value = self.ph.hash(plain_field_value) + + sql_cmd = f"UPDATE users SET {field_to_update} = '{field_new_value}' WHERE {look_up_method} = '{look_up_value}'" + + self.cursor.execute(sql_cmd) + self.conn.commit() + + print (f"Successfully updated `{'password' if field_to_update=='hashed_password' else field_to_update}` TO `{plain_field_value}` \n for user whose `{look_up_method}` IS `{look_up_value}` ") # FIXME: The if statement is ugly. + + + def list_users(self): + # get schema of the table users + self.cursor.execute("PRAGMA table_info(users)") + rows = self.cursor.fetchall() + + headers = [row[1] for row in rows][:-1] + + self.cursor.execute("SELECT * FROM users") + rows = self.cursor.fetchall() + + # print the table in Markdown format + print("user_id".ljust(32), "| user_name".ljust(17), "| email") + print("-" * 32, "|", "-" * 15, "|", "-" * 20) + for row in rows: + print (row[0].ljust(32), "|", row[1].ljust(15), "|", row[2]) + # -1 to skip the hashed password + + # def reset_user_password(self, user_id, new_password=generate_random_string()): + # hashed_password = self.ph.hash(new_password) + # self.cursor.execute("UPDATE users SET hashed_password = ? WHERE user_id = ?", (hashed_password, user_id)) + # self.conn.commit() + # return new_password + + # def change_user_email(self, user_id, new_email): + # self.cursor.execute("UPDATE users SET email = ? WHERE user_id = ?", (new_email, user_id)) + # self.conn.commit() + + # def change_user_name(self, user_id, new_username): + # self.cursor.execute("UPDATE users SET user_name = ? WHERE user_id = ?", (new_username, user_id)) + # self.conn.commit() + + # def get_user_by_email(self, email): + # self.cursor.execute("SELECT * FROM users WHERE email = ?", (email,)) + # return self.cursor.fetchone() + + # def get_user_by_id(self, user_id): + # self.cursor.execute("SELECT * FROM users WHERE user_id = ?", (user_id,)) + # return self.cursor.fetchone() + + def new_user(self, user_name:str=None, email:str=None, password:str=None): + if email in [None, ""]: + email = input("Email of the user (must be unique): ") + + if password in [None, ""]: + password = input("Password of the user (leave empty and hit Enter for a random one): ") + if password == "": + password = generate_random_string(length=6) + + if user_name in [None, ""]: + user_name = input("User name (for display only, not as credential) for the user: ") + + hashed_password = self.ph.hash(password) + user_id = uuid.uuid4().hex + self.cursor.execute("INSERT INTO users (user_id, user_name, email, hashed_password) VALUES (?, ?, ?, ?)", + (user_id, user_name, email, hashed_password)) + self.conn.commit() + + print(f"User created with user_id {user_id}, email {email}, password {password}") + print ("Please save the email and password in a secure location. You will not be able to password again.") + + # def delete_user(self, user_id): + # self.cursor.execute("DELETE FROM users WHERE user_id = ?", (user_id,)) + # self.conn.commit() + + def export(self, csv_path: str): + csv_fp = open(csv_path, "w", newline="") + writer = csv.writer(csv_fp) + writer.writerow(["user_id", "user_name", "email", "password", "delete"]) + rows = self.cursor.execute("SELECT * FROM users") + for user_id, user_name, email, _ in rows: + writer.writerow([user_id, user_name, email, "", 0]) # Passwords are hashed, so we don't export them + csv_fp.close() + + # def apply(self, csv_path: str, destructive: bool): + # """Import users from a CSV file into SQLite database. + # Disabled as of Jan 13, 2025. + # """ + # csv_fp = open(csv_path, newline="") + # reader = csv.DictReader(csv_fp) + # for row in reader: + # user_id = row["user_id"] + # password = row["password"] + # user_name = row["user_name"] + # email = row["email"] + # delete = row["delete"] + # if user_id is None: + # new_password = self.new_user(user_name, email, password) + # print(f"Created new user {user_name} with email {email} and password {new_password}") + # continue + # if delete == "1": + # if destructive: + # self.delete_user(user_id) + # print(f"Deleted user {user_id}") + # else: + # print(f"To delete user {user_id}, use the --destructive or -d flag") + # continue + # if self.get_user_by_id(user_id) is None: + # print(f"User {user_id} does not exist, ignored.") + # continue + # if password is not None and password != "": + # self.reset_user_password(user_id, password) + # if user_name is not None and user_name != "": + # self.change_user_name(user_id, user_name) + # if email is not None and email != "": + # self.change_user_email(user_id, email) + # csv_fp.close() + + def close(self): + self.conn.close() + + +def main(): + main_parser = argparse.ArgumentParser(description="Manage users") + main_parser.add_argument("--user_db", type=str, help="Path to SQLite database storing user info", default="./users.sqlite") + + user_commands_parser = main_parser.add_subparsers(dest="command", required=True) + + user_commands_parser.add_parser("list", help="List users") + + new_parser = user_commands_parser.add_parser("new", help="Create a new user") + new_parser.add_argument("-e", "--email", type=str, help="Email of the user. Must be unique. For login.") + new_parser.add_argument("-p", "--password", type=str, help="Password of the user. Leave empty for a random one.") + new_parser.add_argument("-n", "--user_name", type=str, help="User name for display, not for logging in.") + + update_parser = user_commands_parser.add_parser("update", help="Update a user's info including resetting password") + update_parser.add_argument("-k", "--look_up_method", type=str, help="How to look up the user") + update_parser.add_argument("-v", "--look_up_value", type=str, help="Value to look up the user") + update_parser.add_argument("-f", "--field_to_update", type=str, help="Field to update") + update_parser.add_argument("-n", "--field_new_value", type=str, help="New value for the field") + + export_parser = user_commands_parser.add_parser("export", help="Export user info to a CSV file") + export_parser.add_argument("csv", type=str, help="Path to the CSV file for exporting") + + # Disabled as of Jan 13, 2025 + # apply_parser = user_commands_parser.add_parser("apply", help="Apply changes from a CSV file") + # apply_parser.add_argument("csv", type=str, help="Path to the CSV file for importing") + # apply_parser.add_argument("-d", "--destructive", action="store_true", help="Delete users") + + args = main_parser.parse_args() + + db_utils = UserUtils(args.user_db) + + match args.command: + case "export": + db_utils.export(csv_path=args.csv) + case "list": + db_utils.list_users() + case "new": + db_utils.new_user(user_name=args.user_name, email=args.email, password=args.password) + case "update": + db_utils.update_user(look_up_method=args.look_up_method, look_up_value=args.look_up_value, field_to_update=args.field_to_update, field_new_value=args.field_new_value) + # case "apply": + # db_utils.apply(csv_path=args.csv, destructive=args.destructive) + case _: + print("Invalid command") + + db_utils.close() + + +if __name__ == "__main__": + main() diff --git a/user_utils.py b/user_utils.py deleted file mode 100644 index 7ee322c..0000000 --- a/user_utils.py +++ /dev/null @@ -1,135 +0,0 @@ -import csv -import sqlite3 -import uuid -import argon2 -import random -import string -import argparse - - -def generate_random_string(length=16): - characters = string.ascii_letters + string.digits + string.punctuation - return ''.join(random.choice(characters) for _ in range(length)) - - -class UserUtils: - def __init__(self, db_path, csv_path): - self.csv_path = csv_path - self.conn = sqlite3.connect(db_path) - self.cursor = self.conn.cursor() - self.ph = argon2.PasswordHasher(time_cost=2, memory_cost=19456, parallelism=1) - self.conn.execute("""CREATE TABLE IF NOT EXISTS users ( - user_id TEXT PRIMARY KEY, - user_name TEXT NOT NULL, - email TEXT NOT NULL UNIQUE, - hashed_password TEXT NOT NULL)""") - self.conn.commit() - - def reset_user_password(self, user_id, new_password=generate_random_string()): - hashed_password = self.ph.hash(new_password) - self.cursor.execute("UPDATE users SET hashed_password = ? WHERE user_id = ?", (hashed_password, user_id)) - self.conn.commit() - return new_password - - def change_user_email(self, user_id, new_email): - self.cursor.execute("UPDATE users SET email = ? WHERE user_id = ?", (new_email, user_id)) - self.conn.commit() - - def change_user_name(self, user_id, new_username): - self.cursor.execute("UPDATE users SET user_name = ? WHERE user_id = ?", (new_username, user_id)) - self.conn.commit() - - def get_user_by_email(self, email): - self.cursor.execute("SELECT * FROM users WHERE email = ?", (email,)) - return self.cursor.fetchone() - - def get_user_by_id(self, user_id): - self.cursor.execute("SELECT * FROM users WHERE user_id = ?", (user_id,)) - return self.cursor.fetchone() - - def new_user(self, user_name, email, password): - if password is None or password == "": - password = generate_random_string() - hashed_password = self.ph.hash(password) - user_id = uuid.uuid4().hex - self.cursor.execute("INSERT INTO users (user_id, user_name, email, hashed_password) VALUES (?, ?, ?, ?)", - (user_id, user_name, email, hashed_password)) - self.conn.commit() - return password - - def delete_user(self, user_id): - self.cursor.execute("DELETE FROM users WHERE user_id = ?", (user_id,)) - self.conn.commit() - - - def export(self): - csv_fp = open(self.csv_path, "w", newline="") - writer = csv.writer(csv_fp) - writer.writerow(["user_id", "user_name", "email", "password", "delete"]) - rows = self.cursor.execute("SELECT * FROM users") - for user_id, user_name, email, _ in rows: - writer.writerow([user_id, user_name, email, "", 0]) # Passwords are hashed, so we don't export them - csv_fp.close() - - def apply(self, destructive: bool): - csv_fp = open(self.csv_path, newline="") - reader = csv.DictReader(csv_fp) - for row in reader: - user_id = row["user_id"] - password = row["password"] - user_name = row["user_name"] - email = row["email"] - delete = row["delete"] - if user_id is None: - new_password = self.new_user(user_name, email, password) - print(f"Created new user {user_name} with email {email} and password {new_password}") - continue - if delete == "1": - if destructive: - self.delete_user(user_id) - print(f"Deleted user {user_id}") - else: - print(f"To delete user {user_id}, use the --destructive or -d flag") - continue - if self.get_user_by_id(user_id) is None: - print(f"User {user_id} does not exist, ignored.") - continue - if password is not None and password != "": - self.reset_user_password(user_id, password) - if user_name is not None and user_name != "": - self.change_user_name(user_id, user_name) - if email is not None and email != "": - self.change_user_email(user_id, email) - csv_fp.close() - - def close(self): - self.conn.close() - - -def main(): - main_parser = argparse.ArgumentParser(description="Manage users") - main_parser.add_argument("--sqlite_path", type=str, help="Path to the user SQLite database", default="./users.sqlite") - main_parser.add_argument("--csv", type=str, help="Path to the CSV file", default="./users.csv") - user_commands_parser = main_parser.add_subparsers(dest="command", required=True) - - user_commands_parser.add_parser("export", help="Export users to a CSV file") - apply_parser = user_commands_parser.add_parser("apply", help="Apply changes from a CSV file") - apply_parser.add_argument("-d", "--destructive", action="store_true", help="Delete users") - - args = main_parser.parse_args() - - db_utils = UserUtils(args.sqlite_path, args.csv) - - match args.command: - case "export": - db_utils.export() - case "apply": - db_utils.apply(args.destructive) - case _: - print("Invalid command") - - db_utils.close() - - -if __name__ == "__main__": - main() diff --git a/users.sqlite b/users.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..8d4aa3b763bebec28381beb4bbdedb905fb89b6f GIT binary patch literal 16384 zcmeI&%Wm306b9fiL}?Q#aiuQ2%t-a3s3Zz5F=2reL#-CYK+L6WB2{RJ2e5)MzJ${5 z=%e&~y3MMOkO%0pgHxoED2m!$r2lBfbMcYw`F1mM%eLH@)4m^iET*r?Ga^akh*CmG zO4Kz`mm!JH?eX&7eQk>DeGRtcUnISKMYgZxAM*G6QZWw!2tWV=5P$##AOHafKmY;| z_*a2>ReGW*iu5grS#QKIBOXS{B~@vfM%$!qqimTpnW8T@>0hy!>$(nYn;+Zsq*<*Q z%~N`8o@Vc6jhV-9^6HH?t#>SIHHmx79jzwNPQ7Y(%+DhmhxSaTh9TFMnwPh9c`dNwX;*-0sV}|hBh_4)qodid@=+e009U<00Izz X00bZa0SG_<0(S(Sr*}7Q|6uSNMER(q literal 0 HcmV?d00001 From 803d84d85ffcd07a251a9b54db01d79403249f29 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:49:24 +0800 Subject: [PATCH 14/30] feat:(database): version control --- database.py | 8 ++++++++ ingester.py | 5 +++++ version.py | 1 + 3 files changed, 14 insertions(+) create mode 100644 version.py diff --git a/database.py b/database.py index cee11b0..f2d0af8 100644 --- a/database.py +++ b/database.py @@ -10,6 +10,7 @@ import sqlite_vec from dotenv import load_dotenv +from version import __version__ class OldLabelData(TypedDict): # readable by frontend @@ -162,6 +163,13 @@ def __init__(self, mercury_db_path: str, user_db_path: str): # prepare the database mercury_db = sqlite3.connect(mercury_db_path) print("Open db at ", mercury_db_path) + version = mercury_db.execute("SELECT value FROM config WHERE key = 'version'").fetchone() + if version is None: + mercury_db.execute("INSERT INTO config (key, value) VALUES ('version', ?)", (__version__,)) + else: + if version[0] != __version__: + print("Database version mismatch. Please migrate the database.") + exit(1) mercury_db.execute("CREATE TABLE IF NOT EXISTS annotations (\ annot_id INTEGER PRIMARY KEY AUTOINCREMENT, \ sample_id INTEGER, \ diff --git a/ingester.py b/ingester.py index fadb404..4a8ad54 100644 --- a/ingester.py +++ b/ingester.py @@ -9,6 +9,7 @@ from dotenv import load_dotenv from tqdm.auto import tqdm +from version import __version__ import struct @@ -138,6 +139,10 @@ def prepare_db(self): "INSERT OR REPLACE INTO config (key, value) VALUES ('embedding_dimension', ?)", [self.embedding_dimension], ) + self.db.execute( + "INSERT OR REPLACE INTO config (key, value) VALUES ('version', ?)", + [__version__] + ) self.db.commit() diff --git a/version.py b/version.py new file mode 100644 index 0000000..a68927d --- /dev/null +++ b/version.py @@ -0,0 +1 @@ +__version__ = "0.1.0" \ No newline at end of file From 0953425df18c8ad5651468c79fbe586ae2947133 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:53:13 +0800 Subject: [PATCH 15/30] feat: mercury version --- database.py | 1 + ingester.py | 2 ++ server.py | 3 +++ 3 files changed, 6 insertions(+) diff --git a/database.py b/database.py index f2d0af8..58f3683 100644 --- a/database.py +++ b/database.py @@ -692,6 +692,7 @@ def get_env_id_value(env_name: str) -> int | None: parser.add_argument("--mercury_db_path", type=str, required=True, help="Path to the Mercury SQLite database") parser.add_argument("--user_db_path", type=str, required=True, help="Path to the user SQLite database") parser.add_argument("--dump_file", type=str, required=True, default="mercury_annotations.json") + parser.add_argument("--version", action="version", version="__version__") args = parser.parse_args() # db = Database(args.annotation_corpus_id) diff --git a/ingester.py b/ingester.py index 4a8ad54..3f6357f 100644 --- a/ingester.py +++ b/ingester.py @@ -254,9 +254,11 @@ def get_env_id_value(env_name: str) -> int | None: default="summary", help="The name of the 2nd column to ingest", ) + parser.add_argument("--version", action="version", version="__version__") args = parser.parse_args() + print("Mercury version: ", __version__) print("Ingesting data") ingester = Ingester( file_to_ingest=args.file_to_ingest, diff --git a/server.py b/server.py index b42c369..af9c768 100644 --- a/server.py +++ b/server.py @@ -26,6 +26,7 @@ import sqlite_vec from ingester import Embedder from database import Database +from version import __version__ import jwt from jwt.exceptions import InvalidTokenError @@ -441,6 +442,7 @@ async def login(): parser.add_argument("--mercury_db", type=str, required=True, default="./mercury.sqlite") parser.add_argument("--user_db", type=str, required=True, default="./user.sqlite") parser.add_argument("--port", type=int, default=8000) + parser.add_argument("--version", action="version", version="__version__") args = parser.parse_args() env_secret_key = os.getenv("SECRET_KEY") @@ -450,6 +452,7 @@ async def login(): expire = int(os.getenv("EXPIRE_MINUTES", 10080)) env_config = Config(secret_key=env_secret_key, expire=expire) + print("Mercury version: ", __version__) print("Using Mercury SQLite db: ", args.mercury_db) print("Using User SQLite db: ", args.user_db) From 5b8d7e33f4e7b018a34f027f5cf6d1dd46be0145 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 15:21:42 +0800 Subject: [PATCH 16/30] fix: config key is not unique --- database.py | 3 ++- ingester.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/database.py b/database.py index 58f3683..99065b8 100644 --- a/database.py +++ b/database.py @@ -165,7 +165,8 @@ def __init__(self, mercury_db_path: str, user_db_path: str): print("Open db at ", mercury_db_path) version = mercury_db.execute("SELECT value FROM config WHERE key = 'version'").fetchone() if version is None: - mercury_db.execute("INSERT INTO config (key, value) VALUES ('version', ?)", (__version__,)) + print("Can not determine database version.") + exit(1) else: if version[0] != __version__: print("Database version mismatch. Please migrate the database.") diff --git a/ingester.py b/ingester.py index 3f6357f..8337e1e 100644 --- a/ingester.py +++ b/ingester.py @@ -119,7 +119,7 @@ def prepare_db(self): f"CREATE VIRTUAL TABLE embeddings USING vec0(embedding float[{self.embedding_dimension}])" ) self.db.execute( - "CREATE TABLE IF NOT EXISTS config (key TEXT PRIMARY KEY, value TEXT)" + "CREATE TABLE IF NOT EXISTS config (key TEXT PRIMARY KEY UNIQUE , value TEXT)" ) self.db.execute( "CREATE TABLE IF NOT EXISTS sample_meta (sample_id INTEGER PRIMARY KEY, json_meta TEXT)" From 10b712d45f8acf73f21c9720030911fdef1f5d27 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 15:23:45 +0800 Subject: [PATCH 17/30] feat: migrate to database with version control --- migration/database_version_control.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 migration/database_version_control.py diff --git a/migration/database_version_control.py b/migration/database_version_control.py new file mode 100644 index 0000000..4259bba --- /dev/null +++ b/migration/database_version_control.py @@ -0,0 +1,27 @@ +import argparse +import sqlite3 + + +class Migrator: + def __init__(self, db_path): + conn = sqlite3.connect(db_path) + self.conn = conn + version = self.conn.execute("SELECT count(*) FROM config WHERE key = 'version'").fetchone() + if version[0] != 0: + print("Can not migrate database with existing version") + exit(0) + + def migrate(self): + self.conn.execute("ALTER TABLE config RENAME TO config_old") + self.conn.execute("CREATE TABLE config(key TEXT PRIMARY KEY UNIQUE , value TEXT)") + self.conn.execute("INSERT INTO config SELECT key, value FROM config_old") + self.conn.execute("INSERT INTO config VALUES ('version', '0.1.0')") + self.conn.commit() + print("Migration completed") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Migrate the database to 0.1.0") + parser.add_argument("--db_path", help="Path to the database", default="../mercury.sqlite") + args = parser.parse_args() + migrator = Migrator(args.db_path) + migrator.migrate() From c143fda4c9f39825d3f49351e5f0b5f75d7d28db Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 15:25:24 +0800 Subject: [PATCH 18/30] chore: make code more readable --- database.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/database.py b/database.py index 99065b8..eedde5b 100644 --- a/database.py +++ b/database.py @@ -167,8 +167,7 @@ def __init__(self, mercury_db_path: str, user_db_path: str): if version is None: print("Can not determine database version.") exit(1) - else: - if version[0] != __version__: + elif version[0] != __version__: print("Database version mismatch. Please migrate the database.") exit(1) mercury_db.execute("CREATE TABLE IF NOT EXISTS annotations (\ From d6f1d7818fc4eda4d27b255f49d0911a233667e0 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sun, 5 Jan 2025 15:34:28 +0800 Subject: [PATCH 19/30] fix:: drop old table --- migration/database_version_control.py | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/database_version_control.py b/migration/database_version_control.py index 4259bba..043b0f9 100644 --- a/migration/database_version_control.py +++ b/migration/database_version_control.py @@ -16,6 +16,7 @@ def migrate(self): self.conn.execute("CREATE TABLE config(key TEXT PRIMARY KEY UNIQUE , value TEXT)") self.conn.execute("INSERT INTO config SELECT key, value FROM config_old") self.conn.execute("INSERT INTO config VALUES ('version', '0.1.0')") + self.conn.execute("DROP TABLE config_old") self.conn.commit() print("Migration completed") From efb6dc320911201c757b2c5ee7db7e59417ec16e Mon Sep 17 00:00:00 2001 From: forrestbao Date: Tue, 14 Jan 2025 17:18:38 -0600 Subject: [PATCH 20/30] clean up migration scripts --- database.py | 4 ++-- migrator.py => migration/add_login.py | 0 migration/readme.md | 32 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) rename migrator.py => migration/add_login.py (100%) create mode 100644 migration/readme.md diff --git a/database.py b/database.py index eedde5b..68b7b6d 100644 --- a/database.py +++ b/database.py @@ -165,10 +165,10 @@ def __init__(self, mercury_db_path: str, user_db_path: str): print("Open db at ", mercury_db_path) version = mercury_db.execute("SELECT value FROM config WHERE key = 'version'").fetchone() if version is None: - print("Can not determine database version.") + print("Cannot find Mercury version in the database. Please migrate the database.") exit(1) elif version[0] != __version__: - print("Database version mismatch. Please migrate the database.") + print (f"Mercury version mismatch between the code and the database file. The version in the database is {version[0]}, but the code version is {__version__}. Please migrate the database.") exit(1) mercury_db.execute("CREATE TABLE IF NOT EXISTS annotations (\ annot_id INTEGER PRIMARY KEY AUTOINCREMENT, \ diff --git a/migrator.py b/migration/add_login.py similarity index 100% rename from migrator.py rename to migration/add_login.py diff --git a/migration/readme.md b/migration/readme.md new file mode 100644 index 0000000..0f08614 --- /dev/null +++ b/migration/readme.md @@ -0,0 +1,32 @@ +# Migrating data from old versions + +Mercury, and its database strucuture, are rapidly iterating. + + +## Adding user log in (December 14, 2024) +This change enables credential-based login. This frees the user from the need to always use the same browser. + +To migrate, use the following steps: + +```bash +python3 add_login.py export --workdir {DIR_OF_SQLITE_FILES} --csv unified_users.csv +python3 add_login.py register --csv unified_users.csv --db unified_users.sqlite +``` + +`{DIR_OF_SQLITE_FILES}` is the directory of SQLite corpus DB files that are created before login was implemented. +The script `add_login.py` extracts `user_id` and `user_name` from corpus DB file that contain annotations and dump them as a CSV file. +Then, the script creates a SQLite DB file, referred to as `USER_DB` which can be passed to updated Mercury. + +## Adding versioning (January 15, 2025) + +To deal with the ever-changing database structure, we introduce versioning to Mercury. The version of the Mercury is stored in the `config` table of a corpus DB. +The version of Mercury code is stored in a special file called `version.py`. +The first version is 0.1.0. + +To migrate, use the following steps: + +```bash +python3 database_version_control.py --db_path {OLD_CORPUS_DB} +``` + +It will happen in-place. \ No newline at end of file From 158b7a490eb402f677ce00714f15039df20257ed Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 17:37:25 +0800 Subject: [PATCH 21/30] chore: code cleanup --- database.py | 6 ------ server.py | 8 -------- 2 files changed, 14 deletions(-) diff --git a/database.py b/database.py index ec78696..eedde5b 100644 --- a/database.py +++ b/database.py @@ -454,12 +454,6 @@ def delete_annotation(self, record_id: str, annotator: str): self.mercury_db.execute(sql_cmd, (int(record_id), annotator)) self.mercury_db.commit() - @database_lock() - def add_user(self, user_id: str, user_name: str): # TODO: remove this method since now only admin can add user - sql_cmd = "INSERT INTO users (user_id, user_name) VALUES (?, ?)" - self.mercury_db.execute(sql_cmd, (user_id, user_name)) - self.mercury_db.commit() - @database_lock() def change_user_name(self, user_id: str, user_name: str): self.user_db.execute("UPDATE users SET user_name = ? WHERE user_id = ?", (user_name, user_id)) diff --git a/server.py b/server.py index 1907d31..af9c768 100644 --- a/server.py +++ b/server.py @@ -136,14 +136,6 @@ async def get_labels() -> list: # get all candidate labels for human annotators return labels -@app.get("/user/new") # please update the route name to be more meaningful, e.g., /user/new_user -async def create_new_user(): - user_id = uuid.uuid4().hex - user_name = "New User" - database.add_user(user_id, user_name) - return {"key": user_id, "name": user_name} - - @app.get("/user/me") async def get_user(token: Annotated[str, Depends(oauth2_scheme)], config: Config = Depends(get_config)) -> User: credentials_exception = HTTPException( From ef4610f5ecf9ef70972d8e11404ee1f8d38ea3e9 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 18:12:28 +0800 Subject: [PATCH 22/30] feat(user_utils)!: manage user purely from a CSV file --- user_utils.py | 101 +++++++++++++++++++------------------------------- 1 file changed, 39 insertions(+), 62 deletions(-) diff --git a/user_utils.py b/user_utils.py index 3f71c6a..409170c 100644 --- a/user_utils.py +++ b/user_utils.py @@ -1,3 +1,4 @@ +import csv import sqlite3 import uuid import argon2 @@ -11,8 +12,9 @@ def generate_random_string(length=16): return ''.join(random.choice(characters) for _ in range(length)) -class DatabaseUtils: - def __init__(self, db_path): +class UserUtils: + def __init__(self, db_path, csv_path): + self.csv_path = csv_path self.conn = sqlite3.connect(db_path) self.cursor = self.conn.cursor() self.ph = argon2.PasswordHasher(time_cost=2, memory_cost=19456, parallelism=1) @@ -57,79 +59,54 @@ def delete_user(self, user_id): self.cursor.execute("DELETE FROM users WHERE user_id = ?", (user_id,)) self.conn.commit() + + def export(self): + csv_fp = open(self.csv_path, "w", newline="") + writer = csv.writer(csv_fp) + writer.writerow(["user_id", "user_name", "email", "password"]) + rows = self.cursor.execute("SELECT * FROM users") + for user_id, user_name, email, _ in rows: + writer.writerow([user_id, user_name, email, ""]) # Passwords are hashed, so we don't export them + csv_fp.close() + + def apply(self): + csv_fp = open(self.csv_path, newline="") + reader = csv.DictReader(csv_fp) + for row in reader: + user_id = row["user_id"] + password = row["password"] + user_name = row["user_name"] + email = row["email"] + if password is not None and password != "": + self.reset_user_password(user_id, password) + if user_name is not None and user_name != "": + self.change_user_name(user_id, user_name) + if email is not None and email != "": + self.change_user_email(user_id, email) + csv_fp.close() + def close(self): self.conn.close() def main(): main_parser = argparse.ArgumentParser(description="Manage users") - main_parser.add_argument("--sqlite_path", type=str, required=True, help="Path to the user SQLite database") + main_parser.add_argument("--sqlite_path", type=str, help="Path to the user SQLite database", default="./users.sqlite") + main_parser.add_argument("--csv", type=str, help="Path to the CSV file", default="./users.csv") user_commands_parser = main_parser.add_subparsers(dest="command", required=True) - new_user_parser = user_commands_parser.add_parser("new", help="Create a new user") - new_user_parser.add_argument("--user_name", type=str, required=True, help="Username of the new user") - new_user_parser.add_argument("--email", type=str, required=True, help="Email of the new user") - new_user_parser.add_argument("--password", type=str, help="Password of the new user") - - delete_user_parser = user_commands_parser.add_parser("delete", help="Delete a user") - delete_user_parser.add_argument("--user_id", type=str, required=True, help="User ID to delete") - - reset_password_parser = user_commands_parser.add_parser("reset_password", help="Reset a user's password") - reset_password_parser.add_argument("--user_id", type=str, required=True, help="User ID to reset the password") - reset_password_parser.add_argument("--new_password", type=str, help="New password for the user") - - change_email_parser = user_commands_parser.add_parser("change_email", help="Change a user's email") - change_email_parser.add_argument("--user_id", type=str, required=True, help="User ID to change the email") - change_email_parser.add_argument("--new_email", type=str, required=True, help="New email for the user") - - change_username_parser = user_commands_parser.add_parser("change_username", help="Change a user's username") - change_username_parser.add_argument("--user_id", type=str, required=True, help="User ID to change the username") - change_username_parser.add_argument("--new_username", type=str, required=True, help="New username for the user") - - get_user_parser = user_commands_parser.add_parser("get", help="Get a user") - get_user_parser.add_argument("--user_id", type=str, help="User ID to get") - get_user_parser.add_argument("--email", type=str, help="Email to get") + user_commands_parser.add_parser("export", help="Export users to a CSV file") + user_commands_parser.add_parser("apply", help="Import users from a CSV file") args = main_parser.parse_args() - db_utils = DatabaseUtils(args.sqlite_path) + db_utils = UserUtils(args.sqlite_path, args.csv) match args.command: - case "new": - if args.password: - password = db_utils.new_user(args.user_name, args.email, args.password) - else: - password = db_utils.new_user(args.user_name, args.email) - print(f"New user created with password: {password}") - case "delete": - db_utils.delete_user(args.user_id) - print("User deleted") - case "reset_password": - if args.new_password: - new_password = db_utils.reset_user_password(args.user_id, args.new_password) - else: - new_password = db_utils.reset_user_password(args.user_id) - print(f"Password reset to: {new_password}") - case "change_email": - db_utils.change_user_email(args.user_id, args.new_email) - print("Email changed") - case "change_username": - db_utils.change_user_name(args.user_id, args.new_username) - print("Username changed") - case "get": - if args.user_id: - user = db_utils.get_user_by_id(args.user_id) - elif args.email: - user = db_utils.get_user_by_email(args.email) - else: - user = None - - if user: - print(f"User ID: {user[0]}") - print(f"Username: {user[1]}") - print(f"Email: {user[2]}") - else: - print("User not found") + case "export": + db_utils.export() + case "apply": + db_utils.apply() case _: print("Invalid command") From 3d8399d202f5f4899e636df38f3f53dab6a7e443 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 19:15:15 +0800 Subject: [PATCH 23/30] feat(user_utils): delete user through csv --- user_utils.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/user_utils.py b/user_utils.py index 409170c..885081d 100644 --- a/user_utils.py +++ b/user_utils.py @@ -47,7 +47,9 @@ def get_user_by_id(self, user_id): self.cursor.execute("SELECT * FROM users WHERE user_id = ?", (user_id,)) return self.cursor.fetchone() - def new_user(self, user_name, email, password=generate_random_string()): + def new_user(self, user_name, email, password): + if password is None or password == "": + password = generate_random_string() hashed_password = self.ph.hash(password) user_id = uuid.uuid4().hex self.cursor.execute("INSERT INTO users (user_id, user_name, email, hashed_password) VALUES (?, ?, ?, ?)", @@ -63,13 +65,13 @@ def delete_user(self, user_id): def export(self): csv_fp = open(self.csv_path, "w", newline="") writer = csv.writer(csv_fp) - writer.writerow(["user_id", "user_name", "email", "password"]) + writer.writerow(["user_id", "user_name", "email", "password", "delete"]) rows = self.cursor.execute("SELECT * FROM users") for user_id, user_name, email, _ in rows: - writer.writerow([user_id, user_name, email, ""]) # Passwords are hashed, so we don't export them + writer.writerow([user_id, user_name, email, "", 0]) # Passwords are hashed, so we don't export them csv_fp.close() - def apply(self): + def apply(self, destructive: bool): csv_fp = open(self.csv_path, newline="") reader = csv.DictReader(csv_fp) for row in reader: @@ -77,6 +79,18 @@ def apply(self): password = row["password"] user_name = row["user_name"] email = row["email"] + delete = row["delete"] + if (user_id is None or self.get_user_by_id(user_id) is None) and delete != "1": + new_password = self.new_user(user_name, email, password) + print(f"Created new user {user_name} with email {email} and password {new_password}") + break + if delete == "1": + if destructive: + self.delete_user(user_id) + print(f"Deleted user {user_id}") + else: + print(f"To delete user {user_id}, use the --destructive or -d flag") + continue if password is not None and password != "": self.reset_user_password(user_id, password) if user_name is not None and user_name != "": @@ -96,7 +110,8 @@ def main(): user_commands_parser = main_parser.add_subparsers(dest="command", required=True) user_commands_parser.add_parser("export", help="Export users to a CSV file") - user_commands_parser.add_parser("apply", help="Import users from a CSV file") + apply_parser = user_commands_parser.add_parser("apply", help="Import users from a CSV file") + apply_parser.add_argument("-d", "--destructive", action="store_true", help="") args = main_parser.parse_args() @@ -106,7 +121,7 @@ def main(): case "export": db_utils.export() case "apply": - db_utils.apply() + db_utils.apply(args.destructive) case _: print("Invalid command") From 7fc191ae4b136fa64f89c58628fc048624633b41 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 19:17:38 +0800 Subject: [PATCH 24/30] chore: vocabulary --- user_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/user_utils.py b/user_utils.py index 885081d..67af63d 100644 --- a/user_utils.py +++ b/user_utils.py @@ -110,8 +110,8 @@ def main(): user_commands_parser = main_parser.add_subparsers(dest="command", required=True) user_commands_parser.add_parser("export", help="Export users to a CSV file") - apply_parser = user_commands_parser.add_parser("apply", help="Import users from a CSV file") - apply_parser.add_argument("-d", "--destructive", action="store_true", help="") + apply_parser = user_commands_parser.add_parser("apply", help="Apply changes from a CSV file") + apply_parser.add_argument("-d", "--destructive", action="store_true", help="Delete users") args = main_parser.parse_args() From cd9376fa53350332995fd34755606b7d21dd8d6e Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 21:05:03 +0800 Subject: [PATCH 25/30] docs: user administration --- README.md | 25 +++++++++++++++++++++++-- user_utils.py | 7 +++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 554e715..4fcaabf 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,29 @@ Mercury uses [`sqlite-vec`](https://github.com/asg017/sqlite-vec) to store and s 2. `pnpm install && pnpm build` (You need to recompile the frontend each time the UI code changes.) 3. Manually set the labels for annotators to choose from in the `labels.yaml` file. Mercury supports hierarchical labels. 4. Generate and set a JWT secret key: `export SECRET_KEY=$(openssl rand -base64 32)`. You can rerun the command above to generate a new secret key when needed, especially when the old one is compromised. Note that changing the JWT token will log out all users. Optionally, you can also set `EXPIRE_MINUTES` to change the expiration time of the JWT token. The default is 7 days (10080 minutes). -5. Administer the users: `python3 user_utils.py -h`. You need to create users before they can work on the annotation task. You can register new users, reset passwords, and delete users. User credentials are stored in a separate SQLite database, denoted as `USER_DB` in the following steps. -6. Start the Mercury annotation server: `python3 server.py --mercury_db {MERCURY_DB} --user_db {USER_DB}`. Be sure to set the candidate labels to choose from in the `labels.yaml` file. +5. Start the Mercury annotation server: `python3 server.py --mercury_db {MERCURY_DB} --user_db {USER_DB}`. Be sure to set the candidate labels to choose from in the `labels.yaml` file. + +### Administer the users + +Administration is done via Python script and csv file. + +1. Export user data: `python3 user_utils.py export` +2. Edit csv file + + | user_id | user_name | email | password | delete | + |---------|-----------|--------------|---------------------------------------------------|----------------------------| + | user_id | user_name | unique email | empty. fill new password if you want to change it | initial: 0 (not to delete) | + + 1. Do not edit `user_id`. If you want to create a new user, create a raw and left `user_id` empty. + + When creating new user, left `password` empty to let the script generate a random password. + 2. Edit `user_name`, `email`, `password` if you want to change them. Left them unchanged or empty if you don't. + 3. Change `delete` to 1 if you want to delete a user. If `user_id` is empty, this has no effect and a new user will be created. + +3. Apply changes: `python3 user_utils.py apply` + + If you want to delete users, confirm with `-d` flag: `python3 user_utils.py apply -d` + The annotations are stored in the `annotations` table in a SQLite database (hardcoded name `mercury.sqlite`). See the section [`annotations` table](#annotations-table-the-human-annotations) for the schema. diff --git a/user_utils.py b/user_utils.py index 67af63d..7ee322c 100644 --- a/user_utils.py +++ b/user_utils.py @@ -80,10 +80,10 @@ def apply(self, destructive: bool): user_name = row["user_name"] email = row["email"] delete = row["delete"] - if (user_id is None or self.get_user_by_id(user_id) is None) and delete != "1": + if user_id is None: new_password = self.new_user(user_name, email, password) print(f"Created new user {user_name} with email {email} and password {new_password}") - break + continue if delete == "1": if destructive: self.delete_user(user_id) @@ -91,6 +91,9 @@ def apply(self, destructive: bool): else: print(f"To delete user {user_id}, use the --destructive or -d flag") continue + if self.get_user_by_id(user_id) is None: + print(f"User {user_id} does not exist, ignored.") + continue if password is not None and password != "": self.reset_user_password(user_id, password) if user_name is not None and user_name != "": From 47874b63187f859465305107c4eda704072cf7cc Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Sat, 4 Jan 2025 21:07:50 +0800 Subject: [PATCH 26/30] docs: user administration Closes: #18 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 4fcaabf..249040d 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,8 @@ Administration is done via Python script and csv file. If you want to delete users, confirm with `-d` flag: `python3 user_utils.py apply -d` +**Note** that this script does not have any validation. Please make sure the csv file is correct if you get errors. + The annotations are stored in the `annotations` table in a SQLite database (hardcoded name `mercury.sqlite`). See the section [`annotations` table](#annotations-table-the-human-annotations) for the schema. From 645aedfc9d3ce2ea893b0a6f077b68a3caccfd65 Mon Sep 17 00:00:00 2001 From: forrestbao Date: Mon, 13 Jan 2025 22:47:30 -0600 Subject: [PATCH 27/30] user admin without CSV --- README.md | 42 +++------ user_admin.md | 66 ++++++++++++++ user_admin.py | 245 ++++++++++++++++++++++++++++++++++++++++++++++++++ user_utils.py | 135 ---------------------------- users.sqlite | Bin 0 -> 16384 bytes 5 files changed, 322 insertions(+), 166 deletions(-) create mode 100644 user_admin.md create mode 100644 user_admin.py delete mode 100644 user_utils.py create mode 100644 users.sqlite diff --git a/README.md b/README.md index 249040d..3734c73 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Currently, Mercury only supports labeling inconsistencies between the source and ![Header](usage/selection_from_highlight.png) -## Dependencies +## Dependencies and setup > [!NOTE] > You need Python and Node.js. @@ -22,7 +22,9 @@ Mercury uses [`sqlite-vec`](https://github.com/asg017/sqlite-vec) to store and s 2. If you don't have `pnpm` installed, please install with `npm install -g pnpm` - you may need `sudo`. If you don't have `npm`, try `sudo apt install npm`. -3. To use `sqlite-vec` via Python's built-in `sqlite3` module, you must have SQLite>3.41 (otherwise `LIMIT` or `k=?` will not work properly with `rowid IN (?)` for vector search) installed and ensure Python's built-in `sqlite3` module is built for SQLite>3.41. Note that Python's built-in `sqlite3` module uses its own binary library that is independent of the OS's SQLite. So upgrading the OS's SQLite will not upgrade Python's `sqlite3` module. +3. Compile the frontend: `pnpm install && pnpm build` + +4. To use `sqlite-vec` via Python's built-in `sqlite3` module, you must have SQLite>3.41 (otherwise `LIMIT` or `k=?` will not work properly with `rowid IN (?)` for vector search) installed and ensure Python's built-in `sqlite3` module is built for SQLite>3.41. Note that Python's built-in `sqlite3` module uses its own binary library that is independent of the OS's SQLite. So upgrading the OS's SQLite will not upgrade Python's `sqlite3` module. To manually upgrade Python's `sqlite3` module to use SQLite>3.41, here are the steps: * Download and compile SQLite>3.41.0 from source ```bash @@ -48,7 +50,7 @@ Mercury uses [`sqlite-vec`](https://github.com/asg017/sqlite-vec) to store and s * If you are using Mac and run into troubles, please follow SQLite-vec's [instructions](https://alexgarcia.xyz/sqlite-vec/python.html#updated-sqlite). -4. To use `sqlite-vec` directly in `sqlite` prompt, simply [compile +5. To use `sqlite-vec` directly in `sqlite` prompt, simply [compile `sqlite-vec` from source](https://alexgarcia.xyz/sqlite-vec/compiling.html) and load the compiled `vec0.o`. The usage can be found in the SQLite-vec's [README](https://github.com/asg017/sqlite-vec?tab=readme-ov-file#sample-usage). @@ -58,36 +60,14 @@ Mercury uses [`sqlite-vec`](https://github.com/asg017/sqlite-vec) to store and s Run `python3 ingester.py -h` to see the options. - The ingester takes a CSV, JSON, or JSONL file and loads texts from two text columns (configurable via option `ingest_column_1` and `ingest_column_2` which default to `source` and `summary`) of the file. After ingestion, the data will be stored in the SQLite database, denoted as `MERCURY_DB` in the following steps. - -2. `pnpm install && pnpm build` (You need to recompile the frontend each time the UI code changes.) -3. Manually set the labels for annotators to choose from in the `labels.yaml` file. Mercury supports hierarchical labels. -4. Generate and set a JWT secret key: `export SECRET_KEY=$(openssl rand -base64 32)`. You can rerun the command above to generate a new secret key when needed, especially when the old one is compromised. Note that changing the JWT token will log out all users. Optionally, you can also set `EXPIRE_MINUTES` to change the expiration time of the JWT token. The default is 7 days (10080 minutes). -5. Start the Mercury annotation server: `python3 server.py --mercury_db {MERCURY_DB} --user_db {USER_DB}`. Be sure to set the candidate labels to choose from in the `labels.yaml` file. - -### Administer the users - -Administration is done via Python script and csv file. - -1. Export user data: `python3 user_utils.py export` -2. Edit csv file - - | user_id | user_name | email | password | delete | - |---------|-----------|--------------|---------------------------------------------------|----------------------------| - | user_id | user_name | unique email | empty. fill new password if you want to change it | initial: 0 (not to delete) | - - 1. Do not edit `user_id`. If you want to create a new user, create a raw and left `user_id` empty. - - When creating new user, left `password` empty to let the script generate a random password. - 2. Edit `user_name`, `email`, `password` if you want to change them. Left them unchanged or empty if you don't. - 3. Change `delete` to 1 if you want to delete a user. If `user_id` is empty, this has no effect and a new user will be created. - -3. Apply changes: `python3 user_utils.py apply` - - If you want to delete users, confirm with `-d` flag: `python3 user_utils.py apply -d` + The ingester takes a CSV, JSON, or JSONL file and loads texts from two text columns (configurable via option `ingest_column_1` and `ingest_column_2` which default to `source` and `summary`) of the file. After ingestion, the data will be stored in the SQLite database, denoted as `CORPUS_DB` in the following steps. -**Note** that this script does not have any validation. Please make sure the csv file is correct if you get errors. +2. Manually set the labels for annotators to choose from in the `labels.yaml` file. Mercury supports hierarchical labels. +3. Generate and set a JWT secret key: `export SECRET_KEY=$(openssl rand -base64 32)`. You can rerun the command above to generate a new secret key when needed, especially when the old one is compromised. Note that changing the JWT token will log out all users. Optionally, you can also set `EXPIRE_MINUTES` to change the expiration time of the JWT token. The default is 7 days (10080 minutes). +4. Start the Mercury annotation server: `python3 server.py --corpus_db {CORPUS_DB} --user_db {USER_DB}`. + Be sure to set the candidate labels to choose from in the `labels.yaml` file. The server will run on `http://localhost:8000` by default. The default `USER_DB`, namely `users.sqlite`, is distributed with the code repo with the default Email and password as `test@example.com` and `test`, respectively. +5. **Optional** To add/update/list users in a `USER_DB`, see [User administration in Mercury](user_admin.md) for more details. The annotations are stored in the `annotations` table in a SQLite database (hardcoded name `mercury.sqlite`). See the section [`annotations` table](#annotations-table-the-human-annotations) for the schema. diff --git a/user_admin.md b/user_admin.md new file mode 100644 index 0000000..dc9e9b9 --- /dev/null +++ b/user_admin.md @@ -0,0 +1,66 @@ +# User administration in Mercury + +Mercury uses a SQLite DB for user info (denoted as `USER_DB`) that is separate from the main corpus DB `CORPUS_DB`. By decoupling the user administration from the corpus, we can have a single user DB for multiple corpora and the annotation is always de-anonymized. The Default names for the user DB is `users.sqlite`. + +In a Mercury `USER_DB`, the following fields are stored for each user: +* `user_id`: Hash string that uniquely identifies a user +* `user_name`: User's name (for display purpose only, not for login) +* `email`: User's email (for login) +* `hashed_password`: Hashed password (for login) + +The script for user administration is `user_admin.py`. + +Actions that can be performed: +* Creating a new user + + There are two ways to create a new user: + + 1. Using interactive mode: + ```bash + python user_admin.py new + ``` + then follow the prompts. + + 2. Using command line arguments: + + ```bash + python user_admin.py new -n -e -p + ``` + For example, to create a user with name `Test User`, email `test@example.com` and a random password: + + ```bash + python user_admin.py new -n "Test User" -e "test@example.com" + ``` + +* Listing all users + + ```bash + python user_admin.py list + ``` + +* Changing the password or email of a user, including resetting password + + There are two ways to update a user's info: + 1. Using interactive mode: + + ```bash + python user_admin.py update + ``` + then follow the prompts. + + 2. Using command line arguments: + ```bash + python user_admin.py update -k -v -f -n + ``` + + For example, to change the password of a user with email `test@example.com` to `abcdefg`: + + ```bash + python user_admin.py update -k email -v test@example.com -f password -n abcdefg + ``` + +For various reasons, Mercury does not support deleting users. However, you can simply change the password of a user to a random string to effectively disable the user. + + + +Mercury has minimal exception handling for user administration. \ No newline at end of file diff --git a/user_admin.py b/user_admin.py new file mode 100644 index 0000000..60c7487 --- /dev/null +++ b/user_admin.py @@ -0,0 +1,245 @@ +import csv +import sqlite3 +import uuid +import argon2 +import random +import string +import argparse + +def generate_random_string(length=16): + characters = string.ascii_letters + string.digits + string.punctuation + return ''.join(random.choice(characters) for _ in range(length)) + +class UserUtils: + def __init__(self, db_path): + self.conn = sqlite3.connect(db_path) + self.cursor = self.conn.cursor() + self.ph = argon2.PasswordHasher(time_cost=2, memory_cost=19456, parallelism=1) + self.conn.execute("""CREATE TABLE IF NOT EXISTS users ( + user_id TEXT PRIMARY KEY, + user_name TEXT NOT NULL, + email TEXT NOT NULL UNIQUE, + hashed_password TEXT NOT NULL)""") + self.conn.commit() + + def update_user(self, look_up_method:str=None, look_up_value:str=None, field_to_update:str=None, field_new_value:str=None): + look_up_method_mappping = { + "1": "user_id", + "2": "email", + "3": "user_name" + } + + field_to_update_mapping = { + "1": "email", + "2": "password", + "3": "user_name" + } + + prompt_mapping = { + "password": "Password of the user (leave empty for a random one): ", + "email": "New email for the user: ", + "user_name": "New display name for the user: " + } + + # Find the user to update + if look_up_method is None: + look_up_method = input("How do you wanna look up the user? (Select the number below) \n 1. By user_id (hex) \n 2. By email \n 3. By user_name \n") + look_up_method = look_up_method_mappping[look_up_method] + + if look_up_value is None: + look_up_value = input(f"Please enter the {look_up_method} of the user whom you wanna update: ") + + sql_cmd = f"SELECT * FROM users WHERE {look_up_method} = '{look_up_value}'" + + self.cursor.execute(sql_cmd) + user = self.cursor.fetchone() + if user is None: + print(f"User of {look_up_method} == {look_up_value} not found. Please try again.") + exit() + + # Update the user + if field_to_update is None: + field_to_update = input("What field do you want to update? (Select the number below) \n 1. Email \n 2. Password \n 3. User Name \n") + field_to_update = field_to_update_mapping[field_to_update] + + if field_new_value is None: + field_new_value = input(prompt_mapping[field_to_update]) + + plain_field_value = field_new_value + if field_to_update == "password": # password + if field_new_value == "": + plain_field_value = generate_random_string(length=6) + + if field_to_update == "password": # change name only right before SQL command + field_to_update = "hashed_password" + field_new_value = self.ph.hash(plain_field_value) + + sql_cmd = f"UPDATE users SET {field_to_update} = '{field_new_value}' WHERE {look_up_method} = '{look_up_value}'" + + self.cursor.execute(sql_cmd) + self.conn.commit() + + print (f"Successfully updated `{'password' if field_to_update=='hashed_password' else field_to_update}` TO `{plain_field_value}` \n for user whose `{look_up_method}` IS `{look_up_value}` ") # FIXME: The if statement is ugly. + + + def list_users(self): + # get schema of the table users + self.cursor.execute("PRAGMA table_info(users)") + rows = self.cursor.fetchall() + + headers = [row[1] for row in rows][:-1] + + self.cursor.execute("SELECT * FROM users") + rows = self.cursor.fetchall() + + # print the table in Markdown format + print("user_id".ljust(32), "| user_name".ljust(17), "| email") + print("-" * 32, "|", "-" * 15, "|", "-" * 20) + for row in rows: + print (row[0].ljust(32), "|", row[1].ljust(15), "|", row[2]) + # -1 to skip the hashed password + + # def reset_user_password(self, user_id, new_password=generate_random_string()): + # hashed_password = self.ph.hash(new_password) + # self.cursor.execute("UPDATE users SET hashed_password = ? WHERE user_id = ?", (hashed_password, user_id)) + # self.conn.commit() + # return new_password + + # def change_user_email(self, user_id, new_email): + # self.cursor.execute("UPDATE users SET email = ? WHERE user_id = ?", (new_email, user_id)) + # self.conn.commit() + + # def change_user_name(self, user_id, new_username): + # self.cursor.execute("UPDATE users SET user_name = ? WHERE user_id = ?", (new_username, user_id)) + # self.conn.commit() + + # def get_user_by_email(self, email): + # self.cursor.execute("SELECT * FROM users WHERE email = ?", (email,)) + # return self.cursor.fetchone() + + # def get_user_by_id(self, user_id): + # self.cursor.execute("SELECT * FROM users WHERE user_id = ?", (user_id,)) + # return self.cursor.fetchone() + + def new_user(self, user_name:str=None, email:str=None, password:str=None): + if email in [None, ""]: + email = input("Email of the user (must be unique): ") + + if password in [None, ""]: + password = input("Password of the user (leave empty and hit Enter for a random one): ") + if password == "": + password = generate_random_string(length=6) + + if user_name in [None, ""]: + user_name = input("User name (for display only, not as credential) for the user: ") + + hashed_password = self.ph.hash(password) + user_id = uuid.uuid4().hex + self.cursor.execute("INSERT INTO users (user_id, user_name, email, hashed_password) VALUES (?, ?, ?, ?)", + (user_id, user_name, email, hashed_password)) + self.conn.commit() + + print(f"User created with user_id {user_id}, email {email}, password {password}") + print ("Please save the email and password in a secure location. You will not be able to password again.") + + # def delete_user(self, user_id): + # self.cursor.execute("DELETE FROM users WHERE user_id = ?", (user_id,)) + # self.conn.commit() + + def export(self, csv_path: str): + csv_fp = open(csv_path, "w", newline="") + writer = csv.writer(csv_fp) + writer.writerow(["user_id", "user_name", "email", "password", "delete"]) + rows = self.cursor.execute("SELECT * FROM users") + for user_id, user_name, email, _ in rows: + writer.writerow([user_id, user_name, email, "", 0]) # Passwords are hashed, so we don't export them + csv_fp.close() + + # def apply(self, csv_path: str, destructive: bool): + # """Import users from a CSV file into SQLite database. + # Disabled as of Jan 13, 2025. + # """ + # csv_fp = open(csv_path, newline="") + # reader = csv.DictReader(csv_fp) + # for row in reader: + # user_id = row["user_id"] + # password = row["password"] + # user_name = row["user_name"] + # email = row["email"] + # delete = row["delete"] + # if user_id is None: + # new_password = self.new_user(user_name, email, password) + # print(f"Created new user {user_name} with email {email} and password {new_password}") + # continue + # if delete == "1": + # if destructive: + # self.delete_user(user_id) + # print(f"Deleted user {user_id}") + # else: + # print(f"To delete user {user_id}, use the --destructive or -d flag") + # continue + # if self.get_user_by_id(user_id) is None: + # print(f"User {user_id} does not exist, ignored.") + # continue + # if password is not None and password != "": + # self.reset_user_password(user_id, password) + # if user_name is not None and user_name != "": + # self.change_user_name(user_id, user_name) + # if email is not None and email != "": + # self.change_user_email(user_id, email) + # csv_fp.close() + + def close(self): + self.conn.close() + + +def main(): + main_parser = argparse.ArgumentParser(description="Manage users") + main_parser.add_argument("--user_db", type=str, help="Path to SQLite database storing user info", default="./users.sqlite") + + user_commands_parser = main_parser.add_subparsers(dest="command", required=True) + + user_commands_parser.add_parser("list", help="List users") + + new_parser = user_commands_parser.add_parser("new", help="Create a new user") + new_parser.add_argument("-e", "--email", type=str, help="Email of the user. Must be unique. For login.") + new_parser.add_argument("-p", "--password", type=str, help="Password of the user. Leave empty for a random one.") + new_parser.add_argument("-n", "--user_name", type=str, help="User name for display, not for logging in.") + + update_parser = user_commands_parser.add_parser("update", help="Update a user's info including resetting password") + update_parser.add_argument("-k", "--look_up_method", type=str, help="How to look up the user") + update_parser.add_argument("-v", "--look_up_value", type=str, help="Value to look up the user") + update_parser.add_argument("-f", "--field_to_update", type=str, help="Field to update") + update_parser.add_argument("-n", "--field_new_value", type=str, help="New value for the field") + + export_parser = user_commands_parser.add_parser("export", help="Export user info to a CSV file") + export_parser.add_argument("csv", type=str, help="Path to the CSV file for exporting") + + # Disabled as of Jan 13, 2025 + # apply_parser = user_commands_parser.add_parser("apply", help="Apply changes from a CSV file") + # apply_parser.add_argument("csv", type=str, help="Path to the CSV file for importing") + # apply_parser.add_argument("-d", "--destructive", action="store_true", help="Delete users") + + args = main_parser.parse_args() + + db_utils = UserUtils(args.user_db) + + match args.command: + case "export": + db_utils.export(csv_path=args.csv) + case "list": + db_utils.list_users() + case "new": + db_utils.new_user(user_name=args.user_name, email=args.email, password=args.password) + case "update": + db_utils.update_user(look_up_method=args.look_up_method, look_up_value=args.look_up_value, field_to_update=args.field_to_update, field_new_value=args.field_new_value) + # case "apply": + # db_utils.apply(csv_path=args.csv, destructive=args.destructive) + case _: + print("Invalid command") + + db_utils.close() + + +if __name__ == "__main__": + main() diff --git a/user_utils.py b/user_utils.py deleted file mode 100644 index 7ee322c..0000000 --- a/user_utils.py +++ /dev/null @@ -1,135 +0,0 @@ -import csv -import sqlite3 -import uuid -import argon2 -import random -import string -import argparse - - -def generate_random_string(length=16): - characters = string.ascii_letters + string.digits + string.punctuation - return ''.join(random.choice(characters) for _ in range(length)) - - -class UserUtils: - def __init__(self, db_path, csv_path): - self.csv_path = csv_path - self.conn = sqlite3.connect(db_path) - self.cursor = self.conn.cursor() - self.ph = argon2.PasswordHasher(time_cost=2, memory_cost=19456, parallelism=1) - self.conn.execute("""CREATE TABLE IF NOT EXISTS users ( - user_id TEXT PRIMARY KEY, - user_name TEXT NOT NULL, - email TEXT NOT NULL UNIQUE, - hashed_password TEXT NOT NULL)""") - self.conn.commit() - - def reset_user_password(self, user_id, new_password=generate_random_string()): - hashed_password = self.ph.hash(new_password) - self.cursor.execute("UPDATE users SET hashed_password = ? WHERE user_id = ?", (hashed_password, user_id)) - self.conn.commit() - return new_password - - def change_user_email(self, user_id, new_email): - self.cursor.execute("UPDATE users SET email = ? WHERE user_id = ?", (new_email, user_id)) - self.conn.commit() - - def change_user_name(self, user_id, new_username): - self.cursor.execute("UPDATE users SET user_name = ? WHERE user_id = ?", (new_username, user_id)) - self.conn.commit() - - def get_user_by_email(self, email): - self.cursor.execute("SELECT * FROM users WHERE email = ?", (email,)) - return self.cursor.fetchone() - - def get_user_by_id(self, user_id): - self.cursor.execute("SELECT * FROM users WHERE user_id = ?", (user_id,)) - return self.cursor.fetchone() - - def new_user(self, user_name, email, password): - if password is None or password == "": - password = generate_random_string() - hashed_password = self.ph.hash(password) - user_id = uuid.uuid4().hex - self.cursor.execute("INSERT INTO users (user_id, user_name, email, hashed_password) VALUES (?, ?, ?, ?)", - (user_id, user_name, email, hashed_password)) - self.conn.commit() - return password - - def delete_user(self, user_id): - self.cursor.execute("DELETE FROM users WHERE user_id = ?", (user_id,)) - self.conn.commit() - - - def export(self): - csv_fp = open(self.csv_path, "w", newline="") - writer = csv.writer(csv_fp) - writer.writerow(["user_id", "user_name", "email", "password", "delete"]) - rows = self.cursor.execute("SELECT * FROM users") - for user_id, user_name, email, _ in rows: - writer.writerow([user_id, user_name, email, "", 0]) # Passwords are hashed, so we don't export them - csv_fp.close() - - def apply(self, destructive: bool): - csv_fp = open(self.csv_path, newline="") - reader = csv.DictReader(csv_fp) - for row in reader: - user_id = row["user_id"] - password = row["password"] - user_name = row["user_name"] - email = row["email"] - delete = row["delete"] - if user_id is None: - new_password = self.new_user(user_name, email, password) - print(f"Created new user {user_name} with email {email} and password {new_password}") - continue - if delete == "1": - if destructive: - self.delete_user(user_id) - print(f"Deleted user {user_id}") - else: - print(f"To delete user {user_id}, use the --destructive or -d flag") - continue - if self.get_user_by_id(user_id) is None: - print(f"User {user_id} does not exist, ignored.") - continue - if password is not None and password != "": - self.reset_user_password(user_id, password) - if user_name is not None and user_name != "": - self.change_user_name(user_id, user_name) - if email is not None and email != "": - self.change_user_email(user_id, email) - csv_fp.close() - - def close(self): - self.conn.close() - - -def main(): - main_parser = argparse.ArgumentParser(description="Manage users") - main_parser.add_argument("--sqlite_path", type=str, help="Path to the user SQLite database", default="./users.sqlite") - main_parser.add_argument("--csv", type=str, help="Path to the CSV file", default="./users.csv") - user_commands_parser = main_parser.add_subparsers(dest="command", required=True) - - user_commands_parser.add_parser("export", help="Export users to a CSV file") - apply_parser = user_commands_parser.add_parser("apply", help="Apply changes from a CSV file") - apply_parser.add_argument("-d", "--destructive", action="store_true", help="Delete users") - - args = main_parser.parse_args() - - db_utils = UserUtils(args.sqlite_path, args.csv) - - match args.command: - case "export": - db_utils.export() - case "apply": - db_utils.apply(args.destructive) - case _: - print("Invalid command") - - db_utils.close() - - -if __name__ == "__main__": - main() diff --git a/users.sqlite b/users.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..8d4aa3b763bebec28381beb4bbdedb905fb89b6f GIT binary patch literal 16384 zcmeI&%Wm306b9fiL}?Q#aiuQ2%t-a3s3Zz5F=2reL#-CYK+L6WB2{RJ2e5)MzJ${5 z=%e&~y3MMOkO%0pgHxoED2m!$r2lBfbMcYw`F1mM%eLH@)4m^iET*r?Ga^akh*CmG zO4Kz`mm!JH?eX&7eQk>DeGRtcUnISKMYgZxAM*G6QZWw!2tWV=5P$##AOHafKmY;| z_*a2>ReGW*iu5grS#QKIBOXS{B~@vfM%$!qqimTpnW8T@>0hy!>$(nYn;+Zsq*<*Q z%~N`8o@Vc6jhV-9^6HH?t#>SIHHmx79jzwNPQ7Y(%+DhmhxSaTh9TFMnwPh9c`dNwX;*-0sV}|hBh_4)qodid@=+e009U<00Izz X00bZa0SG_<0(S(Sr*}7Q|6uSNMER(q literal 0 HcmV?d00001 From 1cc286445e64029fadcd1e3e124ca3277cbf0fc9 Mon Sep 17 00:00:00 2001 From: forrestbao Date: Tue, 14 Jan 2025 17:18:38 -0600 Subject: [PATCH 28/30] clean up migration scripts --- database.py | 4 ++-- migrator.py => migration/add_login.py | 0 migration/readme.md | 32 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) rename migrator.py => migration/add_login.py (100%) create mode 100644 migration/readme.md diff --git a/database.py b/database.py index eedde5b..68b7b6d 100644 --- a/database.py +++ b/database.py @@ -165,10 +165,10 @@ def __init__(self, mercury_db_path: str, user_db_path: str): print("Open db at ", mercury_db_path) version = mercury_db.execute("SELECT value FROM config WHERE key = 'version'").fetchone() if version is None: - print("Can not determine database version.") + print("Cannot find Mercury version in the database. Please migrate the database.") exit(1) elif version[0] != __version__: - print("Database version mismatch. Please migrate the database.") + print (f"Mercury version mismatch between the code and the database file. The version in the database is {version[0]}, but the code version is {__version__}. Please migrate the database.") exit(1) mercury_db.execute("CREATE TABLE IF NOT EXISTS annotations (\ annot_id INTEGER PRIMARY KEY AUTOINCREMENT, \ diff --git a/migrator.py b/migration/add_login.py similarity index 100% rename from migrator.py rename to migration/add_login.py diff --git a/migration/readme.md b/migration/readme.md new file mode 100644 index 0000000..0f08614 --- /dev/null +++ b/migration/readme.md @@ -0,0 +1,32 @@ +# Migrating data from old versions + +Mercury, and its database strucuture, are rapidly iterating. + + +## Adding user log in (December 14, 2024) +This change enables credential-based login. This frees the user from the need to always use the same browser. + +To migrate, use the following steps: + +```bash +python3 add_login.py export --workdir {DIR_OF_SQLITE_FILES} --csv unified_users.csv +python3 add_login.py register --csv unified_users.csv --db unified_users.sqlite +``` + +`{DIR_OF_SQLITE_FILES}` is the directory of SQLite corpus DB files that are created before login was implemented. +The script `add_login.py` extracts `user_id` and `user_name` from corpus DB file that contain annotations and dump them as a CSV file. +Then, the script creates a SQLite DB file, referred to as `USER_DB` which can be passed to updated Mercury. + +## Adding versioning (January 15, 2025) + +To deal with the ever-changing database structure, we introduce versioning to Mercury. The version of the Mercury is stored in the `config` table of a corpus DB. +The version of Mercury code is stored in a special file called `version.py`. +The first version is 0.1.0. + +To migrate, use the following steps: + +```bash +python3 database_version_control.py --db_path {OLD_CORPUS_DB} +``` + +It will happen in-place. \ No newline at end of file From e4580f6901991c47e4a70b656cc225859909e4f2 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:15:49 +0800 Subject: [PATCH 29/30] chore: fix typo --- user_admin.py | 2 +- users.sqlite | Bin 16384 -> 0 bytes 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 users.sqlite diff --git a/user_admin.py b/user_admin.py index 60c7487..d54a051 100644 --- a/user_admin.py +++ b/user_admin.py @@ -140,7 +140,7 @@ def new_user(self, user_name:str=None, email:str=None, password:str=None): self.conn.commit() print(f"User created with user_id {user_id}, email {email}, password {password}") - print ("Please save the email and password in a secure location. You will not be able to password again.") + print ("Please save the email and password in a secure location. You will not be able to reveal password again.") # def delete_user(self, user_id): # self.cursor.execute("DELETE FROM users WHERE user_id = ?", (user_id,)) diff --git a/users.sqlite b/users.sqlite deleted file mode 100644 index 8d4aa3b763bebec28381beb4bbdedb905fb89b6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeI&%Wm306b9fiL}?Q#aiuQ2%t-a3s3Zz5F=2reL#-CYK+L6WB2{RJ2e5)MzJ${5 z=%e&~y3MMOkO%0pgHxoED2m!$r2lBfbMcYw`F1mM%eLH@)4m^iET*r?Ga^akh*CmG zO4Kz`mm!JH?eX&7eQk>DeGRtcUnISKMYgZxAM*G6QZWw!2tWV=5P$##AOHafKmY;| z_*a2>ReGW*iu5grS#QKIBOXS{B~@vfM%$!qqimTpnW8T@>0hy!>$(nYn;+Zsq*<*Q z%~N`8o@Vc6jhV-9^6HH?t#>SIHHmx79jzwNPQ7Y(%+DhmhxSaTh9TFMnwPh9c`dNwX;*-0sV}|hBh_4)qodid@=+e009U<00Izz X00bZa0SG_<0(S(Sr*}7Q|6uSNMER(q From 6b77e619334a382c03c4a773998a883032739006 Mon Sep 17 00:00:00 2001 From: Nanami Nakano <64841155+NanamiNakano@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:54:27 +0800 Subject: [PATCH 30/30] fix: user admin --- user_admin.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/user_admin.py b/user_admin.py index d54a051..0f82efb 100644 --- a/user_admin.py +++ b/user_admin.py @@ -63,25 +63,28 @@ def update_user(self, look_up_method:str=None, look_up_value:str=None, field_to field_to_update = field_to_update_mapping[field_to_update] if field_new_value is None: - field_new_value = input(prompt_mapping[field_to_update]) + field_new_value = input(prompt_mapping[field_to_update]).strip() plain_field_value = field_new_value - if field_to_update == "password": # password + if field_to_update == "password": # password if field_new_value == "": - plain_field_value = generate_random_string(length=6) + generated_password = generate_random_string(length=6) + plain_field_value = field_new_value + field_new_value = self.ph.hash(generated_password) + print(field_new_value) + sql_cmd = f"UPDATE users SET hashed_password = '{field_new_value}' WHERE {look_up_method} = '{look_up_value}'" + else: + if field_new_value == "": + print("Field value cannot be empty.") + return + sql_cmd = f"UPDATE users SET {field_to_update} = '{field_new_value}' WHERE {look_up_method} = '{look_up_value}'" - if field_to_update == "password": # change name only right before SQL command - field_to_update = "hashed_password" - field_new_value = self.ph.hash(plain_field_value) + self.cursor.execute(sql_cmd).fetchone() + self.conn.commit() - sql_cmd = f"UPDATE users SET {field_to_update} = '{field_new_value}' WHERE {look_up_method} = '{look_up_value}'" + print (f"Successfully updated `{field_to_update}` TO `{plain_field_value}` \n for user whose `{look_up_method}` IS `{look_up_value}` ") - self.cursor.execute(sql_cmd) - self.conn.commit() - print (f"Successfully updated `{'password' if field_to_update=='hashed_password' else field_to_update}` TO `{plain_field_value}` \n for user whose `{look_up_method}` IS `{look_up_value}` ") # FIXME: The if statement is ugly. - - def list_users(self): # get schema of the table users self.cursor.execute("PRAGMA table_info(users)") @@ -134,7 +137,7 @@ def new_user(self, user_name:str=None, email:str=None, password:str=None): user_name = input("User name (for display only, not as credential) for the user: ") hashed_password = self.ph.hash(password) - user_id = uuid.uuid4().hex + user_id = uuid.uuid4().hex self.cursor.execute("INSERT INTO users (user_id, user_name, email, hashed_password) VALUES (?, ?, ?, ?)", (user_id, user_name, email, hashed_password)) self.conn.commit() @@ -157,7 +160,7 @@ def export(self, csv_path: str): # def apply(self, csv_path: str, destructive: bool): # """Import users from a CSV file into SQLite database. - # Disabled as of Jan 13, 2025. + # Disabled as of Jan 13, 2025. # """ # csv_fp = open(csv_path, newline="") # reader = csv.DictReader(csv_fp) @@ -196,11 +199,11 @@ def close(self): def main(): main_parser = argparse.ArgumentParser(description="Manage users") main_parser.add_argument("--user_db", type=str, help="Path to SQLite database storing user info", default="./users.sqlite") - + user_commands_parser = main_parser.add_subparsers(dest="command", required=True) user_commands_parser.add_parser("list", help="List users") - + new_parser = user_commands_parser.add_parser("new", help="Create a new user") new_parser.add_argument("-e", "--email", type=str, help="Email of the user. Must be unique. For login.") new_parser.add_argument("-p", "--password", type=str, help="Password of the user. Leave empty for a random one.")