From ccf930148f6dc3249e702bca88efc3fce1e80fe6 Mon Sep 17 00:00:00 2001 From: chase mateusiak Date: Mon, 19 Feb 2024 09:13:56 -0600 Subject: [PATCH] ready for aws cloudformation testing --- .pre-commit-config.yaml | 2 +- .vscode/settings.json | 13 - check_spot_termination.py | 31 ++ compose/aws_services/celery_worker/Dockerfile | 2 +- compose/aws_services/django/entrypoint | 2 +- compose/local/django/celery/worker/start | 2 +- compose/production/django/entrypoint | 2 +- compose/production/django/entrypoint_modified | 59 +++ config/aws_cloudform.json | 2 +- config/settings/production.py | 6 +- docs/conf.py | 5 +- docs/model_diagrams/Binding.json | 89 +++++ docs/model_diagrams/Binding.svg | 1 + docs/model_diagrams/BindingManualQC.json | 54 +++ docs/model_diagrams/BindingManualQC.svg | 1 + .../CallingCardsBackground.json | 63 ++++ .../model_diagrams/CallingCardsBackground.svg | 1 + docs/model_diagrams/ChrMap.json | 70 ++++ docs/model_diagrams/ChrMap.svg | 1 + docs/model_diagrams/DataSource.json | 72 ++++ docs/model_diagrams/DataSource.svg | 1 + docs/model_diagrams/Expression.json | 85 +++++ docs/model_diagrams/Expression.svg | 1 + docs/model_diagrams/ExpressionManualQC.json | 38 ++ docs/model_diagrams/ExpressionManualQC.svg | 1 + docs/model_diagrams/FileFormat.json | 81 +++++ docs/model_diagrams/FileFormat.svg | 1 + docs/model_diagrams/GenomicFeature.json | 75 ++++ docs/model_diagrams/GenomicFeature.svg | 1 + docs/model_diagrams/PromoterSet.json | 46 +++ docs/model_diagrams/PromoterSet.svg | 1 + docs/model_diagrams/Regulator.json | 52 +++ docs/model_diagrams/Regulator.svg | 1 + rds_redis_ec2_config.yml | 1 - yeastregulatorydb/conftest.py | 1 + .../migrations/0005_auto_20240210_1228.py | 23 ++ .../api/filters/BindingFilter.py | 5 +- .../api/filters/ExpressionFilter.py | 1 - .../api/filters/PromoterSetSigFilter.py | 5 + .../api/views/BindingManualQCViewSet.py | 72 +++- .../api/views/BindingViewSet.py | 12 +- .../api/views/ExpressionManualQCViewSet.py | 20 +- .../api/views/ExpressionViewSet.py | 15 +- .../api/views/GenomicFeatureViewSet.py | 1 - .../api/views/PromoterSetSigViewSet.py | 5 +- ..._bindingmanualqc_best_datatype_and_more.py | 55 +++ .../regulatory_data/models/Binding.py | 3 - .../models/CallingCardsBackground.py | 20 -- .../regulatory_data/models/Expression.py | 23 +- .../regulatory_data/models/PromoterSetSig.py | 17 - .../regulatory_data/models/RankResponse.py | 6 - .../regulatory_data/tasks/BaseTask.py | 38 ++ .../regulatory_data/tasks/__init__.py | 5 +- .../regulatory_data/tasks/chained_tasks.py | 41 +++ .../combine_cc_passing_replicates_task.py | 114 ++++++ .../tasks/promoter_significance_task.py | 1 + .../ccexperiment_292_hap5_chrI.csv.gz | Bin 0 -> 1593 bytes .../ccexperiment_297_hap5_chrI.csv.gz | Bin 0 -> 1295 bytes .../ccexperiment_302_hap5_chrI.csv.gz | Bin 0 -> 1515 bytes .../ccexperiment_311_hap5_chrI.csv.gz | Bin 0 -> 2480 bytes .../binding/chipexo/10535_hap5_chrI.csv.gz | Bin 0 -> 325 bytes .../binding/chipexo/10706_hap5_chrI.csv.gz | Bin 0 -> 372 bytes .../config/binding_bulk_cc_upload.csv | 3 + .../regulatory_data/tests/test_filters.py | 23 +- .../regulatory_data/tests/test_tasks.py | 12 +- .../regulatory_data/tests/test_views.py | 340 ++++++++++++------ .../regulatory_data/utils/validate_df.py | 2 +- .../utils/create_model_diagram.py | 115 ++++++ 68 files changed, 1595 insertions(+), 246 deletions(-) delete mode 100644 .vscode/settings.json create mode 100644 check_spot_termination.py create mode 100644 compose/production/django/entrypoint_modified create mode 100644 docs/model_diagrams/Binding.json create mode 100644 docs/model_diagrams/Binding.svg create mode 100644 docs/model_diagrams/BindingManualQC.json create mode 100644 docs/model_diagrams/BindingManualQC.svg create mode 100644 docs/model_diagrams/CallingCardsBackground.json create mode 100644 docs/model_diagrams/CallingCardsBackground.svg create mode 100644 docs/model_diagrams/ChrMap.json create mode 100644 docs/model_diagrams/ChrMap.svg create mode 100644 docs/model_diagrams/DataSource.json create mode 100644 docs/model_diagrams/DataSource.svg create mode 100644 docs/model_diagrams/Expression.json create mode 100644 docs/model_diagrams/Expression.svg create mode 100644 docs/model_diagrams/ExpressionManualQC.json create mode 100644 docs/model_diagrams/ExpressionManualQC.svg create mode 100644 docs/model_diagrams/FileFormat.json create mode 100644 docs/model_diagrams/FileFormat.svg create mode 100644 docs/model_diagrams/GenomicFeature.json create mode 100644 docs/model_diagrams/GenomicFeature.svg create mode 100644 docs/model_diagrams/PromoterSet.json create mode 100644 docs/model_diagrams/PromoterSet.svg create mode 100644 docs/model_diagrams/Regulator.json create mode 100644 docs/model_diagrams/Regulator.svg create mode 100644 yeastregulatorydb/contrib/sites/migrations/0005_auto_20240210_1228.py create mode 100644 yeastregulatorydb/regulatory_data/migrations/0016_alter_bindingmanualqc_best_datatype_and_more.py create mode 100644 yeastregulatorydb/regulatory_data/tasks/BaseTask.py create mode 100644 yeastregulatorydb/regulatory_data/tasks/combine_cc_passing_replicates_task.py create mode 100644 yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_292_hap5_chrI.csv.gz create mode 100644 yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_297_hap5_chrI.csv.gz create mode 100644 yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_302_hap5_chrI.csv.gz create mode 100644 yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_311_hap5_chrI.csv.gz create mode 100644 yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/10535_hap5_chrI.csv.gz create mode 100644 yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/10706_hap5_chrI.csv.gz create mode 100644 yeastregulatorydb/regulatory_data/tests/test_data/config/binding_bulk_cc_upload.csv create mode 100644 yeastregulatorydb/utils/create_model_diagram.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9d033cc..24f1e68 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: '^docs/|/migrations/|devcontainer.json' +exclude: '^docs/|/migrations/|devcontainer.json|/aws_cloudform.json|^rds_redis_ec2_config.yml' default_stages: [commit] default_language_version: diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 81450bd..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "version": "0.2.0", - "configurations": [ - { - "name": "Python: pytest", - "type": "python", - "request": "launch", - "module": "pytest", - "justMyCode": false, - // Add any other configuration options here - } - ] -} \ No newline at end of file diff --git a/check_spot_termination.py b/check_spot_termination.py new file mode 100644 index 0000000..7a12624 --- /dev/null +++ b/check_spot_termination.py @@ -0,0 +1,31 @@ +import time +from subprocess import call + +import requests + + +def check_spot_termination(): + """Check for Spot Instance termination notice.""" + termination_url = "http://169.254.169.254/latest/meta-data/spot/instance-action" + try: + response = requests.get(termination_url, timeout=2) + if response.status_code == 200: + return True + except requests.exceptions.RequestException: + pass + return False + + +def graceful_shutdown(): + """Initiate a graceful shutdown of the Celery worker.""" + # Replace `celery_worker_name` with the actual name of your worker + call(["pkill", "-9", "celery"]) + + +if __name__ == "__main__": + while True: + if check_spot_termination(): + print("Spot Instance termination notice detected. Initiating graceful shutdown.") + graceful_shutdown() + break + time.sleep(30) # Check every 30 seconds diff --git a/compose/aws_services/celery_worker/Dockerfile b/compose/aws_services/celery_worker/Dockerfile index a4737e0..3603f61 100644 --- a/compose/aws_services/celery_worker/Dockerfile +++ b/compose/aws_services/celery_worker/Dockerfile @@ -27,4 +27,4 @@ COPY . /usr/src/app/ COPY ./compose/production/django/celery/worker/start /start-celeryworker RUN chmod +x /start-celeryworker -CMD ["/start-celeryworker"] \ No newline at end of file +CMD ["/start-celeryworker"] diff --git a/compose/aws_services/django/entrypoint b/compose/aws_services/django/entrypoint index 249d8d9..f15d0b5 100644 --- a/compose/aws_services/django/entrypoint +++ b/compose/aws_services/django/entrypoint @@ -7,7 +7,7 @@ set -o nounset # N.B. If only .env files supported variable expansion... -export CELERY_BROKER_URL="${REDIS_URL}" +export CELERY_BROKER_URL="redis://${REDIS_HOST}:${REDIS_PORT}/0" if [ -z "${POSTGRES_USER}" ]; then diff --git a/compose/local/django/celery/worker/start b/compose/local/django/celery/worker/start index 183a801..3911aae 100644 --- a/compose/local/django/celery/worker/start +++ b/compose/local/django/celery/worker/start @@ -4,4 +4,4 @@ set -o errexit set -o nounset -exec watchfiles --filter python celery.__main__.main --args '-A config.celery_app worker -l INFO' +exec watchfiles --filter python celery.__main__.main --args '-A config.celery_app worker -l INFO --concurrency=3' diff --git a/compose/production/django/entrypoint b/compose/production/django/entrypoint index 249d8d9..f15d0b5 100644 --- a/compose/production/django/entrypoint +++ b/compose/production/django/entrypoint @@ -7,7 +7,7 @@ set -o nounset # N.B. If only .env files supported variable expansion... -export CELERY_BROKER_URL="${REDIS_URL}" +export CELERY_BROKER_URL="redis://${REDIS_HOST}:${REDIS_PORT}/0" if [ -z "${POSTGRES_USER}" ]; then diff --git a/compose/production/django/entrypoint_modified b/compose/production/django/entrypoint_modified new file mode 100644 index 0000000..08222b1 --- /dev/null +++ b/compose/production/django/entrypoint_modified @@ -0,0 +1,59 @@ +#!/bin/bash + +set -o errexit +set -o pipefail +set -o nounset + +# N.B. If only .env files supported variable expansion... +export CELERY_BROKER_URL="redis://${REDIS_HOST}:${REDIS_PORT}/0" + +if [ -z "${POSTGRES_USER}" ]; then + base_postgres_image_default_user='postgres' + export POSTGRES_USER="${base_postgres_image_default_user}" +fi +export DATABASE_URL="postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}" + +python << END +import sys +import time + +import psycopg + +def database_exists(conn_params, dbname): + with psycopg.connect(**conn_params) as conn: + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (dbname,)) + return cur.fetchone() is not None + +def create_database(conn_params, dbname): + with psycopg.connect(**conn_params) as conn: + conn.autocommit = True + with conn.cursor() as cur: + cur.execute(f"CREATE DATABASE \"{dbname}\"") + +conn_params = { + "dbname": "postgres", # connect to the default database to check/create + "user": "${POSTGRES_USER}", + "password": "${POSTGRES_PASSWORD}", + "host": "${POSTGRES_HOST}", + "port": "${POSTGRES_PORT}" +} + +dbname = "${POSTGRES_DB}" + +if not database_exists(conn_params, dbname): + print("Database does not exist. Creating database: {}".format(dbname)) + create_database(conn_params, dbname) +else: + print("Database {} already exists.".format(dbname)) + +# Now connect to the target database +conn_params["dbname"] = dbname +with psycopg.connect(**conn_params) as conn: + print('Connected to the database successfully') + +END + +>&2 echo 'PostgreSQL is available' + +exec "$@" diff --git a/config/aws_cloudform.json b/config/aws_cloudform.json index e223ce8..2cdc982 100644 --- a/config/aws_cloudform.json +++ b/config/aws_cloudform.json @@ -553,4 +553,4 @@ } } } -} \ No newline at end of file +} diff --git a/config/settings/production.py b/config/settings/production.py index a081f1d..4975746 100644 --- a/config/settings/production.py +++ b/config/settings/production.py @@ -1,6 +1,7 @@ +import os + from .base import * # noqa from .base import env -import os # GENERAL # ------------------------------------------------------------------------------ @@ -8,6 +9,7 @@ SECRET_KEY = env("DJANGO_SECRET_KEY") # https://docs.djangoproject.com/en/dev/ref/settings/#allowed-hosts ALLOWED_HOSTS = env.list("DJANGO_ALLOWED_HOSTS", default=["example.com"]) +REDIS_URL = "redis://" + env("REDIS_HOST", "redis") + ":" + env("REDIS_PORT", "6379") + "/0" # DATABASES # ------------------------------------------------------------------------------ @@ -18,7 +20,7 @@ CACHES = { "default": { "BACKEND": "django_redis.cache.RedisCache", - "LOCATION": env("REDIS_URL"), + "LOCATION": REDIS_URL, "OPTIONS": { "CLIENT_CLASS": "django_redis.client.DefaultClient", # Mimicing memcache behavior. diff --git a/docs/conf.py b/docs/conf.py index b4f19b0..2c18825 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,6 +12,7 @@ import os import sys + import django if os.getenv("READTHEDOCS", default=False) == "True": @@ -20,8 +21,10 @@ os.environ["USE_DOCKER"] = "no" else: sys.path.insert(0, os.path.abspath("/app")) + +REDIS_URL = "redis://" + os.getenv("REDIS_HOST", "redis") + ":" + os.getenv("REDIS_PORT", "6379") + "/0" os.environ["DATABASE_URL"] = "sqlite:///readthedocs.db" -os.environ["CELERY_BROKER_URL"] = os.getenv("REDIS_URL", "redis://redis:6379") +os.environ["CELERY_BROKER_URL"] = REDIS_URL os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local") django.setup() diff --git a/docs/model_diagrams/Binding.json b/docs/model_diagrams/Binding.json new file mode 100644 index 0000000..446f38c --- /dev/null +++ b/docs/model_diagrams/Binding.json @@ -0,0 +1,89 @@ +{ + "model_name": "Binding", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "batch", + "type": "CharField" + }, + { + "name": "replicate", + "type": "PositiveIntegerField" + }, + { + "name": "source_orig_id", + "type": "CharField" + }, + { + "name": "strain", + "type": "CharField" + }, + { + "name": "condition", + "type": "CharField" + }, + { + "name": "file", + "type": "FileField" + }, + { + "name": "genomic_inserts", + "type": "PositiveIntegerField" + }, + { + "name": "mito_inserts", + "type": "PositiveIntegerField" + }, + { + "name": "plasmid_inserts", + "type": "PositiveIntegerField" + }, + { + "name": "notes", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "bindingmanualqc", + "related_model": "BindingManualQC", + "type": "UnknownRelation" + }, + { + "name": "promotersetsig", + "related_model": "PromoterSetSig", + "type": "UnknownRelation" + }, + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "regulator", + "related_model": "Regulator", + "type": "ForeignKey" + }, + { + "name": "source", + "related_model": "DataSource", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/Binding.svg b/docs/model_diagrams/Binding.svg new file mode 100644 index 0000000..5d6e3ee --- /dev/null +++ b/docs/model_diagrams/Binding.svg @@ -0,0 +1 @@ +Bindingid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)batch (CharField)replicate (PositiveIntegerField)source_orig_id (CharField)strain (CharField)condition (CharField)file (FileField)genomic_inserts (PositiveIntegerField)mito_inserts (PositiveIntegerField)plasmid_inserts (PositiveIntegerField)notes (CharField)bindingmanualqc -> BindingManualQC (UnknownRelation)promotersetsig -> PromoterSetSig (UnknownRelation)uploader -> User (ForeignKey)modifier -> User (ForeignKey)regulator -> Regulator (ForeignKey)source -> DataSource (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/BindingManualQC.json b/docs/model_diagrams/BindingManualQC.json new file mode 100644 index 0000000..5b8b7a5 --- /dev/null +++ b/docs/model_diagrams/BindingManualQC.json @@ -0,0 +1,54 @@ +{ + "model_name": "BindingManualQC", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "best_datatype", + "type": "CharField" + }, + { + "name": "data_usable", + "type": "CharField" + }, + { + "name": "passing_replicate", + "type": "CharField" + }, + { + "name": "rank_recall", + "type": "CharField" + }, + { + "name": "notes", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "binding", + "related_model": "Binding", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/BindingManualQC.svg b/docs/model_diagrams/BindingManualQC.svg new file mode 100644 index 0000000..b86d455 --- /dev/null +++ b/docs/model_diagrams/BindingManualQC.svg @@ -0,0 +1 @@ +BindingManualQCid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)best_datatype (CharField)data_usable (CharField)passing_replicate (CharField)rank_recall (CharField)notes (CharField)uploader -> User (ForeignKey)modifier -> User (ForeignKey)binding -> Binding (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/CallingCardsBackground.json b/docs/model_diagrams/CallingCardsBackground.json new file mode 100644 index 0000000..d05b7a1 --- /dev/null +++ b/docs/model_diagrams/CallingCardsBackground.json @@ -0,0 +1,63 @@ +{ + "model_name": "CallingCardsBackground", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "name", + "type": "CharField" + }, + { + "name": "file", + "type": "FileField" + }, + { + "name": "genomic_inserts", + "type": "PositiveIntegerField" + }, + { + "name": "mito_inserts", + "type": "PositiveIntegerField" + }, + { + "name": "plasmid_inserts", + "type": "PositiveIntegerField" + }, + { + "name": "notes", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "promotersetsig", + "related_model": "PromoterSetSig", + "type": "UnknownRelation" + }, + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "fileformat", + "related_model": "FileFormat", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/CallingCardsBackground.svg b/docs/model_diagrams/CallingCardsBackground.svg new file mode 100644 index 0000000..b62a016 --- /dev/null +++ b/docs/model_diagrams/CallingCardsBackground.svg @@ -0,0 +1 @@ +CallingCardsBackgroundid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)name (CharField)file (FileField)genomic_inserts (PositiveIntegerField)mito_inserts (PositiveIntegerField)plasmid_inserts (PositiveIntegerField)notes (CharField)promotersetsig -> PromoterSetSig (UnknownRelation)uploader -> User (ForeignKey)modifier -> User (ForeignKey)fileformat -> FileFormat (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/ChrMap.json b/docs/model_diagrams/ChrMap.json new file mode 100644 index 0000000..da1a0e0 --- /dev/null +++ b/docs/model_diagrams/ChrMap.json @@ -0,0 +1,70 @@ +{ + "model_name": "ChrMap", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "refseq", + "type": "CharField" + }, + { + "name": "igenomes", + "type": "CharField" + }, + { + "name": "ensembl", + "type": "CharField" + }, + { + "name": "ucsc", + "type": "CharField" + }, + { + "name": "mitra", + "type": "CharField" + }, + { + "name": "numbered", + "type": "CharField" + }, + { + "name": "chr", + "type": "CharField" + }, + { + "name": "seqlength", + "type": "PositiveIntegerField" + }, + { + "name": "type", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "genomicfeature", + "related_model": "GenomicFeature", + "type": "UnknownRelation" + }, + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/ChrMap.svg b/docs/model_diagrams/ChrMap.svg new file mode 100644 index 0000000..b632398 --- /dev/null +++ b/docs/model_diagrams/ChrMap.svg @@ -0,0 +1 @@ +ChrMapid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)refseq (CharField)igenomes (CharField)ensembl (CharField)ucsc (CharField)mitra (CharField)numbered (CharField)chr (CharField)seqlength (PositiveIntegerField)type (CharField)genomicfeature -> GenomicFeature (UnknownRelation)uploader -> User (ForeignKey)modifier -> User (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/DataSource.json b/docs/model_diagrams/DataSource.json new file mode 100644 index 0000000..952fad3 --- /dev/null +++ b/docs/model_diagrams/DataSource.json @@ -0,0 +1,72 @@ +{ + "model_name": "DataSource", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "name", + "type": "CharField" + }, + { + "name": "lab", + "type": "CharField" + }, + { + "name": "assay", + "type": "CharField" + }, + { + "name": "workflow", + "type": "CharField" + }, + { + "name": "description", + "type": "CharField" + }, + { + "name": "citation", + "type": "CharField" + }, + { + "name": "notes", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "binding", + "related_model": "Binding", + "type": "UnknownRelation" + }, + { + "name": "expression", + "related_model": "Expression", + "type": "UnknownRelation" + }, + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "fileformat", + "related_model": "FileFormat", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/DataSource.svg b/docs/model_diagrams/DataSource.svg new file mode 100644 index 0000000..155194e --- /dev/null +++ b/docs/model_diagrams/DataSource.svg @@ -0,0 +1 @@ +DataSourceid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)name (CharField)lab (CharField)assay (CharField)workflow (CharField)description (CharField)citation (CharField)notes (CharField)binding -> Binding (UnknownRelation)expression -> Expression (UnknownRelation)uploader -> User (ForeignKey)modifier -> User (ForeignKey)fileformat -> FileFormat (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/Expression.json b/docs/model_diagrams/Expression.json new file mode 100644 index 0000000..dbb9027 --- /dev/null +++ b/docs/model_diagrams/Expression.json @@ -0,0 +1,85 @@ +{ + "model_name": "Expression", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "batch", + "type": "CharField" + }, + { + "name": "strain", + "type": "CharField" + }, + { + "name": "replicate", + "type": "PositiveIntegerField" + }, + { + "name": "control", + "type": "CharField" + }, + { + "name": "mechanism", + "type": "CharField" + }, + { + "name": "restriction", + "type": "CharField" + }, + { + "name": "time", + "type": "PositiveIntegerField" + }, + { + "name": "file", + "type": "FileField" + }, + { + "name": "notes", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "expressionmanualqc", + "related_model": "ExpressionManualQC", + "type": "UnknownRelation" + }, + { + "name": "rankresponse", + "related_model": "RankResponse", + "type": "UnknownRelation" + }, + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "regulator", + "related_model": "Regulator", + "type": "ForeignKey" + }, + { + "name": "source", + "related_model": "DataSource", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/Expression.svg b/docs/model_diagrams/Expression.svg new file mode 100644 index 0000000..b7b7164 --- /dev/null +++ b/docs/model_diagrams/Expression.svg @@ -0,0 +1 @@ +Expressionid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)batch (CharField)strain (CharField)replicate (PositiveIntegerField)control (CharField)mechanism (CharField)restriction (CharField)time (PositiveIntegerField)file (FileField)notes (CharField)expressionmanualqc -> ExpressionManualQC (UnknownRelation)rankresponse -> RankResponse (UnknownRelation)uploader -> User (ForeignKey)modifier -> User (ForeignKey)regulator -> Regulator (ForeignKey)source -> DataSource (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/ExpressionManualQC.json b/docs/model_diagrams/ExpressionManualQC.json new file mode 100644 index 0000000..61594fa --- /dev/null +++ b/docs/model_diagrams/ExpressionManualQC.json @@ -0,0 +1,38 @@ +{ + "model_name": "ExpressionManualQC", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "strain_verified", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "expression", + "related_model": "Expression", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/ExpressionManualQC.svg b/docs/model_diagrams/ExpressionManualQC.svg new file mode 100644 index 0000000..450dcf6 --- /dev/null +++ b/docs/model_diagrams/ExpressionManualQC.svg @@ -0,0 +1 @@ +ExpressionManualQCid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)strain_verified (CharField)uploader -> User (ForeignKey)modifier -> User (ForeignKey)expression -> Expression (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/FileFormat.json b/docs/model_diagrams/FileFormat.json new file mode 100644 index 0000000..ac448f2 --- /dev/null +++ b/docs/model_diagrams/FileFormat.json @@ -0,0 +1,81 @@ +{ + "model_name": "FileFormat", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "fileformat", + "type": "CharField" + }, + { + "name": "fields", + "type": "JSONField" + }, + { + "name": "separator", + "type": "CharField" + }, + { + "name": "feature_identifier_col", + "type": "CharField" + }, + { + "name": "effect_col", + "type": "CharField" + }, + { + "name": "default_effect_threshold", + "type": "FloatField" + }, + { + "name": "pval_col", + "type": "CharField" + }, + { + "name": "default_pvalue_threshold", + "type": "FloatField" + } + ], + "relationships": [ + { + "name": "callingcardsbackground", + "related_model": "CallingCardsBackground", + "type": "UnknownRelation" + }, + { + "name": "datasource", + "related_model": "DataSource", + "type": "UnknownRelation" + }, + { + "name": "promotersetsig", + "related_model": "PromoterSetSig", + "type": "UnknownRelation" + }, + { + "name": "rankresponse", + "related_model": "RankResponse", + "type": "UnknownRelation" + }, + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/FileFormat.svg b/docs/model_diagrams/FileFormat.svg new file mode 100644 index 0000000..f4d513c --- /dev/null +++ b/docs/model_diagrams/FileFormat.svg @@ -0,0 +1 @@ +FileFormatid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)fileformat (CharField)fields (JSONField)separator (CharField)feature_identifier_col (CharField)effect_col (CharField)default_effect_threshold (FloatField)pval_col (CharField)default_pvalue_threshold (FloatField)callingcardsbackground -> CallingCardsBackground (UnknownRelation)datasource -> DataSource (UnknownRelation)promotersetsig -> PromoterSetSig (UnknownRelation)rankresponse -> RankResponse (UnknownRelation)uploader -> User (ForeignKey)modifier -> User (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/GenomicFeature.json b/docs/model_diagrams/GenomicFeature.json new file mode 100644 index 0000000..899b8f7 --- /dev/null +++ b/docs/model_diagrams/GenomicFeature.json @@ -0,0 +1,75 @@ +{ + "model_name": "GenomicFeature", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "start", + "type": "PositiveIntegerField" + }, + { + "name": "end", + "type": "PositiveIntegerField" + }, + { + "name": "strand", + "type": "CharField" + }, + { + "name": "type", + "type": "CharField" + }, + { + "name": "locus_tag", + "type": "CharField" + }, + { + "name": "symbol", + "type": "CharField" + }, + { + "name": "source", + "type": "CharField" + }, + { + "name": "alias", + "type": "CharField" + }, + { + "name": "note", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "regulator", + "related_model": "Regulator", + "type": "UnknownRelation" + }, + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "chr", + "related_model": "ChrMap", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/GenomicFeature.svg b/docs/model_diagrams/GenomicFeature.svg new file mode 100644 index 0000000..3030f5a --- /dev/null +++ b/docs/model_diagrams/GenomicFeature.svg @@ -0,0 +1 @@ +GenomicFeatureid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)start (PositiveIntegerField)end (PositiveIntegerField)strand (CharField)type (CharField)locus_tag (CharField)symbol (CharField)source (CharField)alias (CharField)note (CharField)regulator -> Regulator (UnknownRelation)uploader -> User (ForeignKey)modifier -> User (ForeignKey)chr -> ChrMap (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/PromoterSet.json b/docs/model_diagrams/PromoterSet.json new file mode 100644 index 0000000..ae3d1e0 --- /dev/null +++ b/docs/model_diagrams/PromoterSet.json @@ -0,0 +1,46 @@ +{ + "model_name": "PromoterSet", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "name", + "type": "CharField" + }, + { + "name": "file", + "type": "FileField" + }, + { + "name": "notes", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "promotersetsig", + "related_model": "PromoterSetSig", + "type": "UnknownRelation" + }, + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/PromoterSet.svg b/docs/model_diagrams/PromoterSet.svg new file mode 100644 index 0000000..8064025 --- /dev/null +++ b/docs/model_diagrams/PromoterSet.svg @@ -0,0 +1 @@ +PromoterSetid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)name (CharField)file (FileField)notes (CharField)promotersetsig -> PromoterSetSig (UnknownRelation)uploader -> User (ForeignKey)modifier -> User (ForeignKey) \ No newline at end of file diff --git a/docs/model_diagrams/Regulator.json b/docs/model_diagrams/Regulator.json new file mode 100644 index 0000000..ce94c4b --- /dev/null +++ b/docs/model_diagrams/Regulator.json @@ -0,0 +1,52 @@ +{ + "model_name": "Regulator", + "fields": [ + { + "name": "id", + "type": "BigAutoField" + }, + { + "name": "upload_date", + "type": "DateField" + }, + { + "name": "modified_date", + "type": "DateTimeField" + }, + { + "name": "under_development", + "type": "BooleanField" + }, + { + "name": "notes", + "type": "CharField" + } + ], + "relationships": [ + { + "name": "binding", + "related_model": "Binding", + "type": "UnknownRelation" + }, + { + "name": "expression", + "related_model": "Expression", + "type": "UnknownRelation" + }, + { + "name": "uploader", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "modifier", + "related_model": "User", + "type": "ForeignKey" + }, + { + "name": "genomicfeature", + "related_model": "GenomicFeature", + "type": "ForeignKey" + } + ] +} \ No newline at end of file diff --git a/docs/model_diagrams/Regulator.svg b/docs/model_diagrams/Regulator.svg new file mode 100644 index 0000000..70b4247 --- /dev/null +++ b/docs/model_diagrams/Regulator.svg @@ -0,0 +1 @@ +Regulatorid (BigAutoField)upload_date (DateField)modified_date (DateTimeField)under_development (BooleanField)notes (CharField)binding -> Binding (UnknownRelation)expression -> Expression (UnknownRelation)uploader -> User (ForeignKey)modifier -> User (ForeignKey)genomicfeature -> GenomicFeature (ForeignKey) \ No newline at end of file diff --git a/rds_redis_ec2_config.yml b/rds_redis_ec2_config.yml index d8135b1..a7da85b 100644 --- a/rds_redis_ec2_config.yml +++ b/rds_redis_ec2_config.yml @@ -150,4 +150,3 @@ Outputs: EC2InstancePublicIP: Description: Public IP of the EC2 instance Value: !GetAtt MyEC2Instance.PublicIp - diff --git a/yeastregulatorydb/conftest.py b/yeastregulatorydb/conftest.py index 28095f1..2989e7e 100644 --- a/yeastregulatorydb/conftest.py +++ b/yeastregulatorydb/conftest.py @@ -314,6 +314,7 @@ def callingcardsbackground(db, fileformat: dict) -> CallingCardsBackground: def cc_datasource(db, fileformat: QuerySet) -> DataSource: qbed_fileformat = fileformat.filter(fileformat="qbed").first() content = { + "id": 99, "name": "brent_nf_cc", "fileformat": qbed_fileformat, "lab": "brent", diff --git a/yeastregulatorydb/contrib/sites/migrations/0005_auto_20240210_1228.py b/yeastregulatorydb/contrib/sites/migrations/0005_auto_20240210_1228.py new file mode 100644 index 0000000..3991c55 --- /dev/null +++ b/yeastregulatorydb/contrib/sites/migrations/0005_auto_20240210_1228.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.8 on 2024-02-10 18:28 + +from django.db import migrations + + +def update_site_domain(apps, schema_editor): + Site = apps.get_model('sites', 'Site') + # Update the site domain and name + # Assuming the default SITE_ID is 1 + site = Site.objects.filter(id=1).first() + if site: + site.domain = 'yeastregulatorydb.com' + site.save() + + +class Migration(migrations.Migration): + dependencies = [ + ("sites", "0004_alter_options_ordering_domain"), + ] + + operations = [ + migrations.RunPython(update_site_domain), + ] diff --git a/yeastregulatorydb/regulatory_data/api/filters/BindingFilter.py b/yeastregulatorydb/regulatory_data/api/filters/BindingFilter.py index 07f5218..9a73fdc 100644 --- a/yeastregulatorydb/regulatory_data/api/filters/BindingFilter.py +++ b/yeastregulatorydb/regulatory_data/api/filters/BindingFilter.py @@ -1,6 +1,6 @@ import django_filters -from ...models.Binding import Binding +from ...models import Binding, BindingManualQC class BindingFilter(django_filters.FilterSet): @@ -22,6 +22,9 @@ class BindingFilter(django_filters.FilterSet): lab = django_filters.CharFilter(field_name="source__lab", lookup_expr="iexact") assay = django_filters.CharFilter(field_name="source__assay", lookup_expr="iexact") workflow = django_filters.CharFilter(field_name="source__workflow", lookup_expr="iexact") + data_usable = django_filters.ChoiceFilter( + field_name="bindingmanualqc__data_usable", choices=BindingManualQC.MANUAL_QC_CHOICES + ) # pylint: disable=R0801 class Meta: diff --git a/yeastregulatorydb/regulatory_data/api/filters/ExpressionFilter.py b/yeastregulatorydb/regulatory_data/api/filters/ExpressionFilter.py index 91d1fdf..27727e4 100644 --- a/yeastregulatorydb/regulatory_data/api/filters/ExpressionFilter.py +++ b/yeastregulatorydb/regulatory_data/api/filters/ExpressionFilter.py @@ -1,7 +1,6 @@ import logging import django_filters -from django.core.exceptions import ValidationError from django.db.models import Q from django.db.models.query import QuerySet diff --git a/yeastregulatorydb/regulatory_data/api/filters/PromoterSetSigFilter.py b/yeastregulatorydb/regulatory_data/api/filters/PromoterSetSigFilter.py index 345ddef..e43ec53 100644 --- a/yeastregulatorydb/regulatory_data/api/filters/PromoterSetSigFilter.py +++ b/yeastregulatorydb/regulatory_data/api/filters/PromoterSetSigFilter.py @@ -1,5 +1,6 @@ import django_filters +from ...models.BindingManualQC import BindingManualQC from ...models.PromoterSetSig import PromoterSetSig @@ -23,6 +24,9 @@ class PromoterSetSigFilter(django_filters.FilterSet): lab = django_filters.CharFilter(field_name="binding__source__lab", lookup_expr="iexact") assay = django_filters.CharFilter(field_name="binding__source__assay", lookup_expr="iexact") workflow = django_filters.CharFilter(field_name="binding__source__workflow", lookup_expr="iexact") + data_usable = django_filters.ChoiceFilter( + field_name="binding__bindingmanualqc__data_usable", choices=BindingManualQC.MANUAL_QC_CHOICES + ) # pylint: disable=R0801 class Meta: @@ -41,6 +45,7 @@ class Meta: "lab", "assay", "workflow", + "data_usable", ] # pylint: enable=R0801 diff --git a/yeastregulatorydb/regulatory_data/api/views/BindingManualQCViewSet.py b/yeastregulatorydb/regulatory_data/api/views/BindingManualQCViewSet.py index b491eb3..1eac0e2 100644 --- a/yeastregulatorydb/regulatory_data/api/views/BindingManualQCViewSet.py +++ b/yeastregulatorydb/regulatory_data/api/views/BindingManualQCViewSet.py @@ -1,13 +1,27 @@ +import logging + +from django.core.exceptions import ValidationError as DjangoValidationError +from django.db import transaction from django_filters.rest_framework import DjangoFilterBackend -from rest_framework import viewsets +from rest_framework import status, viewsets from rest_framework.authentication import SessionAuthentication, TokenAuthentication +from rest_framework.decorators import action +from rest_framework.exceptions import ValidationError as DRFValidationError from rest_framework.permissions import IsAuthenticated +from rest_framework.response import Response + +from yeastregulatorydb.regulatory_data.tasks import ( + combine_cc_passing_replicates_promotersig_chained, + combine_cc_passing_replicates_task, +) -from ...models.BindingManualQC import BindingManualQC +from ...models import BindingManualQC from ..filters.BindingManualQCFilter import BindingManualQCFilter from ..serializers.BindingManualQCSerializer import BindingManualQCSerializer from .mixins.UpdateModifiedMixin import UpdateModifiedMixin +logger = logging.getLogger(__name__) + class BindingManualQCViewSet(UpdateModifiedMixin, viewsets.ModelViewSet): """ @@ -33,3 +47,57 @@ class BindingManualQCViewSet(UpdateModifiedMixin, viewsets.ModelViewSet): serializer_class = BindingManualQCSerializer filter_backends = [DjangoFilterBackend] filterset_class = BindingManualQCFilter + + def perform_update(self, serializer): + """ + Modify the default `perform_update` method such that + """ + updated_fields = serializer.validated_data.keys() + instance = serializer.save() + if ( + "data_usable" in updated_fields + and instance.binding.source.assay == "callingcards" + and instance.data_usable + ): + combine_cc_passing_replicates_task.delay(instance.binding.regulator.id, self.request.user.id) + + @action(detail=False, methods=["post"], url_path="bulk-update") + def bulk_update(self, request, *args, **kwargs): + data = request.data.get("data") + updated_records = [] + errors = [] + update_cc_combined_set = set() + + for item in data: + instance = BindingManualQC.objects.get(id=item["id"]) + if instance.binding.source.assay == "callingcards" and item.get("data_usable"): + update_cc_combined_set.add(instance.binding.regulator.id) + try: + for attr, value in item.items(): + setattr(instance, attr, value) + instance.full_clean() # This line validates the model instance before saving + instance.save() + updated_records.append(instance) + except BindingManualQC.DoesNotExist: + errors.append(f"BindingManualQC with id {item['id']} does not exist") + logger.error(f"BindingManualQC with id {item['id']} does not exist") + except DjangoValidationError as exc: + errors.append(f"Failed to update BindingManualQC with id {item['id']}: {exc}") + logger.error(f"Failed to update BindingManualQC with id {item['id']}: {exc}") + + if errors: + # return a 400 response with the collected errors + raise DRFValidationError({"errors": errors}) + + # After all records are updated, perform your operation on the set + for regulator_id in update_cc_combined_set: + if self.request.data.get("testing", False): + combine_cc_passing_replicates_promotersig_chained(self.request.user.id, regulator_id=regulator_id) + else: + transaction.on_commit( + lambda: combine_cc_passing_replicates_promotersig_chained( + self.request.user.id, regulator_id=regulator_id + ) + ) + + return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/yeastregulatorydb/regulatory_data/api/views/BindingViewSet.py b/yeastregulatorydb/regulatory_data/api/views/BindingViewSet.py index f185a09..e1c3fa7 100644 --- a/yeastregulatorydb/regulatory_data/api/views/BindingViewSet.py +++ b/yeastregulatorydb/regulatory_data/api/views/BindingViewSet.py @@ -8,7 +8,7 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.serializers import ValidationError -from ...models.Binding import Binding +from ...models import Binding from ...tasks import promotersetsig_rankedresponse_chained from ..filters import BindingFilter from ..serializers import BindingManualQCSerializer, BindingSerializer, PromoterSetSigSerializer @@ -31,6 +31,7 @@ class BindingViewSet(BulkUploadMixin, UpdateModifiedMixin, ExportTableAsGzipFile filter_backends = [DjangoFilterBackend] filterset_class = BindingFilter + # note that the hop info are added in the FileFormatMixin in the serializers @transaction.atomic def perform_create(self, serializer): try: @@ -45,13 +46,12 @@ def perform_create(self, serializer): # create a BindingManualQC instance and save to the DB bindingmanualqc_data = self.request.data.copy() bindingmanualqc_data["binding"] = instance.id - del bindingmanualqc_data["notes"] - bindingmanualqc_data["notes"] = bindingmanualqc_data.get("qc_notes", "none") + bindingmanualqc_data["notes"] = bindingmanualqc_data.pop("qc_notes", "none") bindingmanualqc_serializer = BindingManualQCSerializer( data=bindingmanualqc_data, context={"request": self.request} ) bindingmanualqc_serializer.is_valid(raise_exception=True) - bindingmanualqc_instance = bindingmanualqc_serializer.save() + bindingmanualqc_serializer.save() # if the source.name is in the settings NULL_BINDING_FILE_DATASOURCES, # then the `file` needs to be added to the promotersetsig table @@ -92,7 +92,7 @@ def perform_create(self, serializer): if acquire_lock(): try: - if self.request.query_params.get("testing"): + if self.request.data.get("testing", False) or self.request.query_params.get("testing", False): promotersetsig_rankedresponse_chained( instance.id, self.request.user.id, promotersetsig_format ) @@ -104,7 +104,7 @@ def perform_create(self, serializer): ) finally: release_lock() - except: + except: # noqa: E722 # Delete the file of the instance if an exception occurs if instance.file and default_storage.exists(instance.file.name): default_storage.delete(instance.file.name) diff --git a/yeastregulatorydb/regulatory_data/api/views/ExpressionManualQCViewSet.py b/yeastregulatorydb/regulatory_data/api/views/ExpressionManualQCViewSet.py index 18c76cb..6115b52 100644 --- a/yeastregulatorydb/regulatory_data/api/views/ExpressionManualQCViewSet.py +++ b/yeastregulatorydb/regulatory_data/api/views/ExpressionManualQCViewSet.py @@ -14,14 +14,18 @@ class ExpressionManualQCViewSet(UpdateModifiedMixin, viewsets.ModelViewSet): A viewset for viewing and editing ExpressionManualQC instances. """ - queryset = ExpressionManualQC.objects.select_related( - "uploader", - "expression", - "expression__regulator", - "expression__regulator__genomicfeature", - "expression__source", - "expression__source__fileformat", - ).all().order_by("-id") + queryset = ( + ExpressionManualQC.objects.select_related( + "uploader", + "expression", + "expression__regulator", + "expression__regulator__genomicfeature", + "expression__source", + "expression__source__fileformat", + ) + .all() + .order_by("-id") + ) authentication_classes = [SessionAuthentication, TokenAuthentication] permission_classes = [IsAuthenticated] serializer_class = ExpressionManualQCSerializer diff --git a/yeastregulatorydb/regulatory_data/api/views/ExpressionViewSet.py b/yeastregulatorydb/regulatory_data/api/views/ExpressionViewSet.py index 3498930..1d029b1 100644 --- a/yeastregulatorydb/regulatory_data/api/views/ExpressionViewSet.py +++ b/yeastregulatorydb/regulatory_data/api/views/ExpressionViewSet.py @@ -1,4 +1,3 @@ -from django.core.cache import cache from django.core.files.storage import default_storage from django.db import IntegrityError, transaction from django_filters.rest_framework import DjangoFilterBackend @@ -24,9 +23,13 @@ class ExpressionViewSet( A viewset for viewing and editing Expression instances. """ - queryset = Expression.objects.select_related( - "uploader", "regulator", "regulator__genomicfeature", "source", "source__fileformat" - ).all().order_by("-id") + queryset = ( + Expression.objects.select_related( + "uploader", "regulator", "regulator__genomicfeature", "source", "source__fileformat" + ) + .all() + .order_by("-id") + ) authentication_classes = [SessionAuthentication, TokenAuthentication] permission_classes = [IsAuthenticated] serializer_class = ExpressionSerializer @@ -53,11 +56,11 @@ def perform_create(self, serializer): ) expressionmanualqc_serializer.is_valid(raise_exception=True) try: - expressionmanualqc_instance = expressionmanualqc_serializer.save() + expressionmanualqc_serializer.save() except IntegrityError as e: ValidationError({"expression": str(e)}) - except: + except: # noqa: E722 # Delete the file of the instance if an exception occurs if instance.file and default_storage.exists(instance.file.name): default_storage.delete(instance.file.name) diff --git a/yeastregulatorydb/regulatory_data/api/views/GenomicFeatureViewSet.py b/yeastregulatorydb/regulatory_data/api/views/GenomicFeatureViewSet.py index ab1dc1e..b68dc17 100644 --- a/yeastregulatorydb/regulatory_data/api/views/GenomicFeatureViewSet.py +++ b/yeastregulatorydb/regulatory_data/api/views/GenomicFeatureViewSet.py @@ -1,4 +1,3 @@ -import pandas as pd from django_filters.rest_framework import DjangoFilterBackend from rest_framework import viewsets from rest_framework.authentication import SessionAuthentication, TokenAuthentication diff --git a/yeastregulatorydb/regulatory_data/api/views/PromoterSetSigViewSet.py b/yeastregulatorydb/regulatory_data/api/views/PromoterSetSigViewSet.py index 7970c6e..407943f 100644 --- a/yeastregulatorydb/regulatory_data/api/views/PromoterSetSigViewSet.py +++ b/yeastregulatorydb/regulatory_data/api/views/PromoterSetSigViewSet.py @@ -1,7 +1,3 @@ -import tempfile - -import pandas as pd -from django.core.cache import cache from django.db import IntegrityError from django_filters.rest_framework import DjangoFilterBackend from rest_framework import viewsets @@ -30,6 +26,7 @@ class PromoterSetSigViewSet( "background", "fileformat", ) + .prefetch_related("binding__bindingmanualqc") .all() .order_by("id") ) diff --git a/yeastregulatorydb/regulatory_data/migrations/0016_alter_bindingmanualqc_best_datatype_and_more.py b/yeastregulatorydb/regulatory_data/migrations/0016_alter_bindingmanualqc_best_datatype_and_more.py new file mode 100644 index 0000000..43bba68 --- /dev/null +++ b/yeastregulatorydb/regulatory_data/migrations/0016_alter_bindingmanualqc_best_datatype_and_more.py @@ -0,0 +1,55 @@ +# Generated by Django 4.2.8 on 2024-02-10 18:21 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("regulatory_data", "0015_bindingmanualqc_rank_recall_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="bindingmanualqc", + name="best_datatype", + field=models.CharField( + choices=[("unreviewed", "unreviewed"), ("pass", "pass"), ("fail", "fail"), ("note", "note")], + default="unreviewed", + help_text="`pass` if the only binding data that performs better is from the same binding source. Otherwise, `unreviewed` or `fail`", + ), + ), + migrations.AlterField( + model_name="bindingmanualqc", + name="data_usable", + field=models.CharField( + choices=[("unreviewed", "unreviewed"), ("pass", "pass"), ("fail", "fail"), ("note", "note")], + default="unreviewed", + help_text="`pass` if there is no reason to believe the data has technical faults. Otherwise, `unreviewed` or `false`", + ), + ), + migrations.AlterField( + model_name="bindingmanualqc", + name="notes", + field=models.CharField( + default="none", help_text="Free entry field for notes from the manual QC review", max_length=100 + ), + ), + migrations.AlterField( + model_name="bindingmanualqc", + name="passing_replicate", + field=models.CharField( + choices=[("unreviewed", "unreviewed"), ("pass", "pass"), ("fail", "fail"), ("note", "note")], + default="unreviewed", + help_text="Primarily, and probably only, relevant to Calling Cards data. `pass` if the replicate's hops should be counted towards the target hop count. `unreviewed` or `false` otherwise", + ), + ), + migrations.AlterField( + model_name="bindingmanualqc", + name="rank_recall", + field=models.CharField( + choices=[("unreviewed", "unreviewed"), ("pass", "pass"), ("fail", "fail"), ("note", "note")], + default="unreviewed", + help_text="`pass` if at least 1 rank response bin in the first 100 genes ranked by pvalue is significant. Else `unreviewed` or `fail`", + ), + ), + ] diff --git a/yeastregulatorydb/regulatory_data/models/Binding.py b/yeastregulatorydb/regulatory_data/models/Binding.py index 4421966..dae6fdf 100644 --- a/yeastregulatorydb/regulatory_data/models/Binding.py +++ b/yeastregulatorydb/regulatory_data/models/Binding.py @@ -1,6 +1,5 @@ import logging -from django.core.files.storage import default_storage from django.db import models from django.dispatch import receiver @@ -94,9 +93,7 @@ def save(self, *args, **kwargs): is_create = self.pk is None super().save(*args, **kwargs) if is_create: - old_file_name = self.file.name if self.file else None self.update_file_name("file", f"binding/{self.source.name}") - new_file_name = self.file.name super().save(update_fields=["file"]) diff --git a/yeastregulatorydb/regulatory_data/models/CallingCardsBackground.py b/yeastregulatorydb/regulatory_data/models/CallingCardsBackground.py index 1cd27d0..ea6b530 100644 --- a/yeastregulatorydb/regulatory_data/models/CallingCardsBackground.py +++ b/yeastregulatorydb/regulatory_data/models/CallingCardsBackground.py @@ -1,6 +1,5 @@ import logging -from django.core.files.storage import default_storage from django.db import models from django.dispatch import receiver @@ -48,27 +47,8 @@ def save(self, *args, **kwargs): is_create = self.pk is None super().save(*args, **kwargs) if is_create: - old_file_name = self.file.name if self.file else None self.update_file_name("file", "callingcards/background", "qbed.gz") - new_file_name = self.file.name super().save(update_fields=["file"]) - # If the file name changed, delete the old file - # if old_file_name and old_file_name != new_file_name: - # default_storage.delete(old_file_name) - - # pylint:disable=R0801 - # def save(self, *args, **kwargs): - # # Store the old file path - # old_file_name = self.file.name if self.file else None - # super().save(*args, **kwargs) - # self.update_file_name("file", "callingcards/background", "qbed.gz") - # new_file_name = self.file.name - # super().save(update_fields=["file"]) - # # If the file name changed, delete the old file - # if old_file_name and old_file_name != new_file_name: - # default_storage.delete(old_file_name) - - # pylint:enable=R0801 @receiver(models.signals.post_delete, sender=CallingCardsBackground) diff --git a/yeastregulatorydb/regulatory_data/models/Expression.py b/yeastregulatorydb/regulatory_data/models/Expression.py index eca8423..6e21a24 100644 --- a/yeastregulatorydb/regulatory_data/models/Expression.py +++ b/yeastregulatorydb/regulatory_data/models/Expression.py @@ -1,6 +1,5 @@ import logging -from django.core.files.storage import default_storage from django.db import models from django.dispatch import receiver @@ -25,7 +24,8 @@ class Expression(BaseModel, GzipFileUploadWithIdMixin): strain = models.CharField( max_length=20, default="undefined", - help_text="The strain used in the experiment. This will be derived from the original data source. Default value is `undefined`", + help_text="The strain used in the experiment. This will be derived " + "from the original data source. Default value is `undefined`", ) replicate = models.PositiveIntegerField(default=1, help_text="Replicate number", db_index=True) control = models.CharField( @@ -79,13 +79,8 @@ def save(self, *args, **kwargs): is_create = self.pk is None super().save(*args, **kwargs) if is_create: - old_file_name = self.file.name if self.file else None self.update_file_name("file", f"expression/{self.source.name}", "csv.gz") - new_file_name = self.file.name super().save(update_fields=["file"]) - # If the file name changed, delete the old file - # if old_file_name and old_file_name != new_file_name: - # default_storage.delete(old_file_name) def get_genomicfeature(self): """return the genomicfeature associated with this expression instance""" @@ -95,20 +90,6 @@ def get_fileformat(self): """return the fileformat associated with this expression instance""" return self.source.fileformat - # pylint: disable=R0801 - # def save(self, *args, **kwargs): - # # Store the old file path - # old_file_name = self.file.name if self.file else None - # super().save(*args, **kwargs) - # self.update_file_name("file", f"expression/{self.source.name}", "csv.gz") - # new_file_name = self.file.name - # super().save(update_fields=["file"]) - # # If the file name changed, delete the old file - # if old_file_name and old_file_name != new_file_name: - # default_storage.delete(old_file_name) - - # pylint: enable=R0801 - # pylint: disable=R0801 @receiver(models.signals.post_delete, sender=Expression) diff --git a/yeastregulatorydb/regulatory_data/models/PromoterSetSig.py b/yeastregulatorydb/regulatory_data/models/PromoterSetSig.py index 56ab115..c5cc9d7 100644 --- a/yeastregulatorydb/regulatory_data/models/PromoterSetSig.py +++ b/yeastregulatorydb/regulatory_data/models/PromoterSetSig.py @@ -1,6 +1,5 @@ import logging -from django.core.files.storage import default_storage from django.db import models from django.dispatch import receiver @@ -45,24 +44,8 @@ def save(self, *args, **kwargs): is_create = self.pk is None super().save(*args, **kwargs) if is_create: - old_file_name = self.file.name if self.file else None self.update_file_name("file", "promotersetsig", "csv.gz") - new_file_name = self.file.name super().save(update_fields=["file"]) - # If the file name changed, delete the old file - # if old_file_name and old_file_name != new_file_name: - # default_storage.delete(old_file_name) - - # def save(self, *args, **kwargs): - # # Store the old file path - # old_file_name = self.file.name if self.file else None - # super().save(*args, **kwargs) - # self.update_file_name("file", "promotersetsig", "csv.gz") - # new_file_name = self.file.name - # super().save(update_fields=["file"]) - # # If the file name changed, delete the old file - # if old_file_name and old_file_name != new_file_name: - # default_storage.delete(old_file_name) # pylint:enable=R0801 diff --git a/yeastregulatorydb/regulatory_data/models/RankResponse.py b/yeastregulatorydb/regulatory_data/models/RankResponse.py index 5a26d2a..fecd756 100644 --- a/yeastregulatorydb/regulatory_data/models/RankResponse.py +++ b/yeastregulatorydb/regulatory_data/models/RankResponse.py @@ -1,6 +1,5 @@ import logging -from django.core.files.storage import default_storage from django.db import models from django.dispatch import receiver @@ -67,13 +66,8 @@ def save(self, *args, **kwargs): is_create = self.pk is None super().save(*args, **kwargs) if is_create: - old_file_name = self.file.name if self.file else None self.update_file_name("file", "rankresponse", "csv.gz") - new_file_name = self.file.name super().save(update_fields=["file"]) - # If the file name changed, delete the old file - # if old_file_name and old_file_name != new_file_name: - # default_storage.delete(old_file_name) # pylint:enable=R0801 diff --git a/yeastregulatorydb/regulatory_data/tasks/BaseTask.py b/yeastregulatorydb/regulatory_data/tasks/BaseTask.py new file mode 100644 index 0000000..1d469b0 --- /dev/null +++ b/yeastregulatorydb/regulatory_data/tasks/BaseTask.py @@ -0,0 +1,38 @@ +import socket + +from celery import Task +from celery.utils.log import get_task_logger +from django.db import OperationalError +from requests.exceptions import ConnectionError, Timeout + +logger = get_task_logger(__name__) + + +class MyBaseTask(Task): + # Define retry for common transient errors + autoretry_for = (ConnectionError, Timeout, OperationalError, IOError, OSError, socket.error) + retry_kwargs = {"max_retries": 3, "countdown": 15} + + # Default delay between retries, starting at x seconds and doubling each time + # since retry_backoff is set to True + retry_backoff = True + default_retry_delay = 15 + # Acknowledge tasks only after they have been completed + acks_late = True + reject_on_worker_lost = True + + def on_failure(self, exc, task_id, args, kwargs, einfo): + """Custom handler for task failure.""" + logger.error(f"Task {self.name} [{task_id}] failed: {exc}") + + def on_retry(self, exc, task_id, args, kwargs, einfo): + """Custom handler for task retries.""" + logger.info(f"Task {self.name} [{task_id}] will be retried: {exc}") + + def after_return(self, status, retval, task_id, args, kwargs, einfo): + """Handler called after the task is executed.""" + if status == "FAILURE": + logger.error(f"Task {self.name} [{task_id}] failed.") + pass + else: + logger.info(f"Task {self.name} [{task_id}] completed successfully.") diff --git a/yeastregulatorydb/regulatory_data/tasks/__init__.py b/yeastregulatorydb/regulatory_data/tasks/__init__.py index 5bf2115..e16b0b4 100644 --- a/yeastregulatorydb/regulatory_data/tasks/__init__.py +++ b/yeastregulatorydb/regulatory_data/tasks/__init__.py @@ -1,7 +1,10 @@ -from .chained_tasks import promotersetsig_rankedresponse_chained +from .chained_tasks import combine_cc_passing_replicates_promotersig_chained, promotersetsig_rankedresponse_chained +from .combine_cc_passing_replicates_task import combine_cc_passing_replicates_task from .promoter_significance_task import promoter_significance_task __all__ = [ "promoter_significance_task", "promotersetsig_rankedresponse_chained", + "combine_cc_passing_replicates_task", + "combine_cc_passing_replicates_promotersig_chained", ] diff --git a/yeastregulatorydb/regulatory_data/tasks/chained_tasks.py b/yeastregulatorydb/regulatory_data/tasks/chained_tasks.py index 9b8ae41..2ccfe52 100644 --- a/yeastregulatorydb/regulatory_data/tasks/chained_tasks.py +++ b/yeastregulatorydb/regulatory_data/tasks/chained_tasks.py @@ -1,7 +1,11 @@ from celery import chain +from django.conf import settings from config import celery_app +from yeastregulatorydb.regulatory_data.models import Binding +from .BaseTask import MyBaseTask +from .combine_cc_passing_replicates_task import combine_cc_passing_replicates_task from .promoter_significance_task import promoter_significance_task @@ -28,3 +32,40 @@ def promotersetsig_rankedresponse_chained(binding_id, user_id, promotersetsig_fi ) result = task.apply_async() return result + + +@celery_app.task(bind=True, base=MyBaseTask) +def combine_cc_passing_replicates_promotersig_chained(self, user_id, **kwargs): + """Chain the combine_cc_passing_replicates and promoter_significance tasks together + + :param user_id: The id of the user who initiated the task + :type user_id: int + :param kwargs: Additional keyword arguments. See the additional arguments + documentation of the :func:`combine_cc_passing_replicates_task` and + :func:`promoter_significance_task` functions for more details. + + :return: The result of the task + :rtype: celery.result.AsyncResult + """ + # if regulator_id is in kwargs, then call the task chain using it. + # otherwise, get a list of all callingcards regulator_ids and + # call the task chain for each + regulator_id_list = ( + [kwargs.pop("regulator_id")] + if kwargs.get("regulator_id") + else Binding.objects.filter(source__assay="callingcards") + .exclude(batch="cc_combined") + .values_list("regulator_id", flat=True) + .distinct() + ) + + task_ids = [] + for regulator_id in regulator_id_list: + # Create a chain of tasks + task = chain( + combine_cc_passing_replicates_task.s(regulator_id, user_id, **kwargs), + promoter_significance_task.s(user_id, settings.CALLINGCARDS_PROMOTER_SIG_FORMAT, **kwargs), + ) + result = task.apply_async() + task_ids.append(result.id) + return task_ids diff --git a/yeastregulatorydb/regulatory_data/tasks/combine_cc_passing_replicates_task.py b/yeastregulatorydb/regulatory_data/tasks/combine_cc_passing_replicates_task.py new file mode 100644 index 0000000..1285a7c --- /dev/null +++ b/yeastregulatorydb/regulatory_data/tasks/combine_cc_passing_replicates_task.py @@ -0,0 +1,114 @@ +import gzip +import io +import logging +import uuid +from types import SimpleNamespace + +import pandas as pd +from django.contrib.auth import get_user_model +from django.core.files import File + +from config import celery_app +from yeastregulatorydb.regulatory_data.api.filters import BindingFilter +from yeastregulatorydb.regulatory_data.api.serializers import BindingSerializer +from yeastregulatorydb.regulatory_data.models import Binding +from yeastregulatorydb.regulatory_data.utils import extract_file_from_storage + +from .BaseTask import MyBaseTask + +logger = logging.getLogger(__name__) + + +@celery_app.task(bind=True, base=MyBaseTask) +def combine_cc_passing_replicates_task(self, regulator_id: int, user_id: int, **kwargs) -> list: + """ + Combine the qbed files for the passing replicates of the calling cards assay. + Note that by default, assay='callingcards' and data_usable='passing' + are used as filters. + + :param regulator_id: a regulator id + :type regulator_id: int + :param user_id: a user id + :type user_id: int + :param output_fileformat: the name of the output FileFormat + :type output_fileformat: str + :param kwargs: additional keyword arguments. This may be used to pass + additional filters to the BindingFilter + :type kwargs: dict + + :returns: a list of regulator genomic feature names which have been + processed + :rtype: list + """ + + try: + User = get_user_model() + user = User.objects.get(id=user_id) + except User.DoesNotExist: + raise ValueError(f"User with id {user_id} does not exist") + + filters = { + "regulator_id": regulator_id, + "assay": kwargs.pop("assay", "callingcards"), + "data_usable": kwargs.pop("data_usable", "passing"), + } + filters.update(kwargs) # update filters with kwargs + cc_binding_set = BindingFilter(filters, queryset=Binding.objects.all()).qs + # get the qbed files from the django storage and read in data + qbed_df_list = [] + for cc_record in cc_binding_set: + filepath = extract_file_from_storage(cc_record.file) + # read filepath into pandas dataframe + df = pd.read_csv(filepath, sep="\t") + qbed_df_list.append(df) + + # combine the qbed files + combined_qbed_df = pd.concat(qbed_df_list) + + buffer = io.BytesIO() + with gzip.GzipFile(fileobj=buffer, mode="wb") as gzipped_file: + combined_qbed_df.to_csv(gzipped_file, sep="\t", index=False) + + # Reset buffer position + buffer.seek(0) + + # Create a Django File object with a uuid filename + django_file = File(buffer, name=f"{uuid.uuid4()}.csv.gz") + + # Create a mock request with only a user attribute + # Assuming you have the user_id available + mock_request = SimpleNamespace(user=user) + + source = kwargs.get("source_name") if kwargs.get("source_name") else cc_binding_set[0].source.name + + # Attempt to find an existing record + existing_record = Binding.objects.filter(regulator_id=regulator_id, batch="cc_combined").first() + + upload_data = { + "regulator": regulator_id, + "batch": "cc_combined", + "source_name": source, + "file": django_file, + } + + if existing_record: + # Use serializer for updating to apply validation/transformation + serializer = BindingSerializer( + existing_record, + data=upload_data, + context={"request": mock_request}, + ) + # Proceed with creation logic if no existing record is found + else: + serializer = BindingSerializer( + data=upload_data, + context={"request": mock_request}, + ) + + if serializer.is_valid(): + combined_binding_record = serializer.save() + return combined_binding_record.id + else: + error_msg = f"Combined Binding Serializer is invalid: {serializer.errors}" + logger.error(error_msg) + raise ValueError(error_msg) diff --git a/yeastregulatorydb/regulatory_data/tasks/promoter_significance_task.py b/yeastregulatorydb/regulatory_data/tasks/promoter_significance_task.py index e392551..24ecf00 100644 --- a/yeastregulatorydb/regulatory_data/tasks/promoter_significance_task.py +++ b/yeastregulatorydb/regulatory_data/tasks/promoter_significance_task.py @@ -73,6 +73,7 @@ def promoter_significance_task(binding_id: int, user_id: int, output_fileformat: with tempfile.TemporaryDirectory() as tmpdir: chrmap_filepath = os.path.join(tmpdir, "chrmap.csv") + # write chrmap to local tmpfile pd.DataFrame(list(ChrMap.objects.all().values())).to_csv(chrmap_filepath, index=False) binding_filepath = extract_file_from_storage(binding_record.file, tmpdir) diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_292_hap5_chrI.csv.gz b/yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_292_hap5_chrI.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..a704be6467cdf86ddf02239f6d4ec702fcdea6fd GIT binary patch literal 1593 zcmV-92FCdxiwFP!00000165c_u3R?^Tz9S#7|_V!2swsf8}P~qetdtVit=yO+f+e> znvbG5slWaFc>VYL>&Nfck6+(k-+%o3`zO{PUw?i7>)X$de_nm9L71=p`ukJlLCDv5 zi+1aT0nwMY$O!)GBI}fO$o$Gi5K@1-7h%8hEwbzc*zy*6+XTBmoFf{cUt{OqMsN|? zbV7(IG6-VCYAXo&vMYiSfbrUIk@ZIC*DiQr5T=UoML=BjyU#`tQ}=2?5Wj9SC(z%Y zzqHmSVcl)kC(P^%rd$zLfDT*VqHsc7!>lJoDTcu>jnGB?f-3;|x<;&6ge|%j)i0>O zyLL{(5|MY9>Z9Frd+(F5t}aL55c;*t9iaq>YnMAh3ou0#h(Ulj-vdqnjMx0!70o7L zT_a-dL74Y$@w-6~U1630Zz7^efD}={6992^4pvO=X04Tg78-%I5a1(%WP0b1Q z0C9A#G=ksT&K*W@$(Y`$2Phwa-+dN}|NC_4B8KMujwGfW|IZQ#GjBQA%n0D4AUM2B zTsKU+2f~fmStmZF(U%qay$)E7TqAb*eK>BHsFfjtc?BHTx1)`nL~GZkfk4=WcF#Hb zfY*p}TUg?pL}+gB-w)(dVao4OEn8tn?f4YdkwzWq9fuw|qX6b_1o- zOjLqas0FXhu?tOinp>euwd@T`qE^DrL?mwAIoc|ee8-X>0@}R~Eaxng+?r#eyaQ*G zI))W~N%VWp33H-Q9Ev5s1k_r;*hh$J{W2N~#QwX}(7uT4hA5X_7^*YS$A}V5bGK!M z$cQUVN9F=isNONQ5IC!FUz^tCP9RI#6($-4QL+waK?hoUw01qRHq)Ib=Um%@J1r%c z*LJq7?OLc&lo2vSITE@PvW}?jXK*KR-1|hf!cOA4#&0*9h3k5G$AU4$eueBdt56(b zW237Pwe3VlUnR<>_In~Oqfo*Qxyc@y zUQP7#>&VL}l)(T?v?}7zpQHK~=1p#IEF(m1Z4H-JC|8W>20DpDh2dT(y#=k4O%FJ) zm(Udvti+)^7t)$&PoWh`t3auSKZ$Z3(H-9M=R5Hl#~LDCl8h!Rc)q)_o(d;ySD_hKzQ)Su!~0(#6B=;D4z z!W&cIDUw$3%9^D;rK zbs}Eu%cOBbyYJZcCc<4?(2ubSjL^(=Qe*{X_m{w%^NnC&* z(pl79@4^zp!V&OT%hF(0@^>Orh|FQmsQQ7_k_t=E$nuJ$3* z(?{}rBv&o?br$K^kG1DMq=SD$8_A*}!CB9PIF$D?aXPxND0FvBmKajXn`Y2W{g71r zF{aZrl2ij+7K?S!c!)Wn7Cn?$Jr77SeU0I`utQsSJ1b|AK1Q(SgPSzu<-zsYytuDgTtXpuZE$4CZN(ZhvV zZ$6Sec5_|i@@gDNY(EsYeg9b(r5c#*#6|1&q(AGTt=E%|A**OUb;mwrPbG0I(LWZj rj(UQEQvPbAfUi)}BP6iSqSc;S>V81>5NAaS=w#LZv-R;!FBAX(I>86^ literal 0 HcmV?d00001 diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_297_hap5_chrI.csv.gz b/yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_297_hap5_chrI.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..f96686801dc14e4128f82c51754206714e53c941 GIT binary patch literal 1295 zcmV+q1@QVGiwFP!0000016`M0t{o>3gr85g5=DY;py?%K8J2CyPgZo}{VP??^`Ry& zUx^ko1N09)Uw?eO{{8jk=uhdxjk$U`0)b?zSDq7L+OboG5QWolsF;2=py|?DYA@!m=;)(Qo_86uY zACCu02)S=$x%D-R>*kLeK2j9dxlKeV83>&B%msgvSiHP6P@c#{2EL!qdOq>|5F;uu zDfP~IY(?>>V>hEH$?0RDPLcUA@ zwSF(-DTDjgJKv530g)h5>1#*H9ZfM#FY19L{8J2B5&~=%rFM)yaUi6e{P2kgBkjHu z8HjC?av(S!6dE$sIB_wMTa62QC+)_~Sb?%Isv^HnX`nRDqO5az$N7+QD%6ySg#jtI1-yt;B9|^H;BiuRv~nS^Nr{k^fn}3ZE?yM*B$w_z_Zr3d zFk;yx8jWjAejU_{t~G=W$@+4w1Fs{cuA-Y7QPk&gXkL_4Mpj|n+*l&YQ$YwlnC3&!bD1Lca%lsT#4LDy(enr5&0P73EL`>eW8Eq zsQ;9ZK*fz(@1{ru;r?XNFBuOrqo8D8=W=Xy>4uAY>k^(vq=D!Y4PBNX_J)|9pQMbN zh#kEJdu56?1ZXAEm6U{R2i;c8BjWy!yhX_^kxI((Y)7OC?HlE39f_MO=z9?9M?zn+ zH#-2z)Rkmw?Fjq*_foQT6p@lH+r-$$5s5L5d7V)tOgKj3m8g%ChkC@HW^{uk_An>g z49IBuNd%4^L3UiTb?--{boZPw$B`*Ph@7S~DdS<@Y%M4*Z6FMZ7N=+T)UxrU-po^dpb{FP+mMT@J9HU~DEPY9E2_^ku9=HHX?4}^b{G}|BG^z1A5^}H@uVY^En9a~G*^hN^ zn@5(;M%?cssb>7yv?KB#gmJJ7rDz+AarlvO>;31rWa>&X>R=(Ro5y$^iRbJ*pUmec z@so38JZ9ss=14xxXFJ>b(>&~m>%rpQk0bjzdmrEp literal 0 HcmV?d00001 diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_302_hap5_chrI.csv.gz b/yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_302_hap5_chrI.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..70dbbb1af9013b0a025757947e57f7a418440779 GIT binary patch literal 1515 zcmV`TW{F0oGpYszxL9tJ>_Z){Zd3z^Xq+DIfoPQt+%fCzO8V)L}}9t=XFQ--A18brS$=m ziCdK$mJs2rx6j9fNPGlhk0@Mscgww#m~tG;>{KRPIPc2zSs2$B5Ld0-?S=$#izKdg zO@hB73fJ8TA{;^-PvtI5Rnkl2kC)!cn4{2DnZpY8n>KKEBG3L-ZjZvJ5~)gz>ty^m zaiXyhX87b&>jUXXWIn!?-ezIQN8OQs#4a|u&svGgaNXq7c46u_XOt3dm2Yn38_~p2 z-(E^b)?Q^Y9-fE?KK}Xp7_f#YIpU3eBFf$L=5rT{O*jgCPxPB3KD>`YNn4x+Il^-_ z`T1OCg+o@t_$ZX!@oW5$*hLwJP!RjgyBQ0iBgQ?^Ty#27>IlRvW!iQ9T)eHs$ImCc zM%0Z~_$ZXTn|tOiOtCX3?lIBj=ecbZ`gKS&M`jUaqiA#N!i1ggQrS^}fVlVXXhug} zDD;~mbWbmo6bT#1cjA8KEv&g`p`>!m?n#_dNh4O@g^~<`IyDNJ4DZU^)-KFjFk%~` zL)7BG4elkHBq-#)3b&SfW6Xda-)F-W2972NEs=;^9-^i9lHN1tWkUY?>w zJc+JLCr1!qzd?o=qfiRTLhtH@CI>|!n1$-RXz&n_v&cPZG-Lu%#*q~RMU^P|01?-S zsM|+bAMTR)3po;na&0k4KqT&aB(zl2PvW{AD2=-lW%oocUxizSE1Sp%qV(jBGKMLy zNTP=cVX?`Rq~D-AvbGgTjbhbAJu1q_P-Z>jkc=iB@#kSlQhy*5e=#$4JJ(lA-?n8OIHDuP}B3z7XO3XP9N&Z3( z6zL)v3t=y6=^?w-k8?L;PkLCg`&fq_r#~+Aa-nRQR_6nmu1=?CN&Vgs$g___{#d=) zyh%e(iFO@_)?;;YJ|>|^zWXB|WK1`bW(un8=!Z;#40-t>eUiXF3}QukOip*}(Bs=< z^4KJGc!cx%fV72mbWfH*l5|!)BU-IaR=697$WEt_#$$EoEhDj5y!ERuOz1^gR4~VI zbS9~V3w@^_GJfT)Pv`MT8aI4{w+-a-1mdHN7lpKSH7p#5q~zjNSe22KPc>gQBn+5Hfv8QUkMB7)g4oklKtrX{$O0dTo(LGgtjP zK(afz_Qc?oMZKwkknPYA$D)+5qLQS$TeHRipY%}gN)eLSO?e!(h5<4Lkwf!tiIK~u z9U3=(FoRjWyg~A+GgyH3TO4lUgi)2$zxdY+vv4lPwD=eeS%dgeIf5A`~$SCyx` zXD+@~NiZ}XdOxHAqP!06hkAEE^spTAIwZyYQzqq0O~G5EXgnf;%G(c_x;%5NLr+W6 R`=RsL{vV&JLGMTt007sy>+1jj literal 0 HcmV?d00001 diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_311_hap5_chrI.csv.gz b/yeastregulatorydb/regulatory_data/tests/test_data/binding/callingcards/ccexperiment_311_hap5_chrI.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..324b8120a9fcfa53c611911d76bda4e1ac23f530 GIT binary patch literal 2480 zcmV;h2~YMPiwFP!0000016^55t}V9>9Cx-77!XN)s3l|>hHbzjBY5%tkt!;$Rh_}D zf_T;Xks>Lc-~RmZ{O9jqfBgOY@z?Lq?|=OJ??2K0@#|l||KqnmfBfrN&G6@sJ^wrc zv%UeNiO8)HOhhq)&_&d{6XNkJni#7u9cy4h0J{3~%PX6N^?1KBqs-Qm#u$^Zo_Iww79myCCt=Bl zYYoD@GxpY#fW^I2_O^}ScbDBP3F!8Ud?aDZVBL-2cS^JtA#bnIuvS3c(;0zd257I| zlE9<>DdJ`T+G`FY`12nAn3KIG*ZaycjNp%ok|4{Utpx!JX|K%CV;XgEMQ8zx2K35u z5%MZ-+NNmdopUI!XT7~Tlrg)vtEAa+GyrUAU)eH( zs00Vt2|9k~@_Xu92W$l(wo)y?lI4#b1Pu6!7IX;^_0ElJ1sJl3&2!y2$-C4&jlfO* z^3ylKZ~*&`wdfcCCZdIl3lO_l3%UWswI19(C*aP#n_LMk0>t~Qghj8M;xV$#AoM#} zwxlRk1ZPB$1IvJ6CLPsy%@Z>^A+Fx4mB$ghqP^jFnwVWCbOgw2>f5n#K;Bj0q?3RX z^DD#j02zNfju{a8C5#Z@D%yiEui5Rujoe}4$97<004(f|-|-Ou|9HDixNm@tPrnZc z+0uet>;=0RR|N9{42XVym^duJxF1@Z^w+?3w`p)&UKko*V$Q-PZX7`(3cG$j;Nytm z_Yj6w7OF57_DW0%J~)^x)OgWHG9-=N?)^JWqCxlus=Xj6PKH)N0M}Z8y+y|H( z)=3<9OJUx6;aA9sfK%3K4)qWEgC?x{1EBubo$KDm3~kUNStv`g#^<+pENh4e|cFC79=SJe|q zF41)X8%<(}MU2KY5vWcghH+5fl<&*SM0pvzhnj=d1Ju24gOZ41kpZ{9i45RZm{i|R z-W5qLaneMY9yufql_+E4!ZHR2wSDu~;6%VtCh>ON@EOE@H3e<(Dga%k$G-9XaD?CY zKB8F`<{_DQAF-@ZG6*yg5Q(Y6&Mg6_oO4Hf@xstSC%z=^uaIk+yXoo|v-h#;!E~hz zJH&k>4AMX-k7)9JE_er`CdQ5wuTZY;aP}lhB8dzUCra!$%p*sHgC>UL_iN556lXS& zAaY}eX z8-awlbWlbDBJ+lu*K+$r2^wf3un{HZvYCZj(_*GRM4ZXp>CV{;hveDt_gT0#hu*^q z#URn@a-gXSKE zQ`ZS)6iTv(=30gF%2;@|wnAN})f2+_hPV^KyN|-DLgNDg82UMj4n%o+;aO3_FmW0S z54lk&A#1@2cwyYVr!6cyPetHO_}$?0iCucagLtqe#r#d^vM>5VLg| z98SaziJ2G@C9aU9yN?DxdMIyvF=kNhkV(EDo>&@*Y5Nu98SaPTrkBXpoQXV19}NhF z6|@(_jjRMwdl#|TCZi;ixgy;@A2N2xbt!n5&O=L<(s6eEkOXYBSL=uLP|&Un#2C@8 zl9^H;@lNf{)aH;~gJrvVgTB27)k6{lsAzDOT9NZqI&l9WS?DMB3TQeUBfB}UM< z$Y?bxX^2S-p2wjjL9YaPXn#hTa~zuTT0|mA{fu%XwA|Z2eH@yfVOZxVlJITAt!kH` z@oW{-%0at*#C_T!movxF)<%OFqg?$FbQDFV$2b~aek9W!jnW+HbLeANWpZq$a`(AA zy{f6xwVsMMSa>_6Q?fk{O}XRQSm7)R*(MTank`9!E!$ifNuC5~?|Nf{mWuI4*hNb( zZNlUEp^(iZSvkG;YI1=RZ<={X-eAUDBo1}$9V{LcxsHwJf*txi?K|^~BK6~mWU3u9 z9ea6;L+U(n-+A##OP-^w7;xJpy`9MGXs#r!hh%QvHi2qnvm-GcDru|A5wp0bZ3w* z(q9JfAYlw5NknIxW=wLvcSTMkBE@I$P34gEGH-aNerV{NctwpvQ!0fJYO=!~*7D?RuU%Mziy&0N{mTrFaIzfoTjDuRl=gy+V7R-X9zH zT?J9nR1HT*n=eVGMz2uue`jPtg%quw}#WPiWG?dvs uE6k&z++Tg>?uW+ziO)**clUJJMc>}LE+S}s#oZ2_DgO(D%rFpH9{>Q?khdNH literal 0 HcmV?d00001 diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/10535_hap5_chrI.csv.gz b/yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/10535_hap5_chrI.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..153089cb755b0efcb5e298f48b35db5547a4c5be GIT binary patch literal 325 zcmV-L0lNMliwFP!0000014WU|N(32hxdAvD%oF9OUBVr;! zg;0PzFo3a1!j^$8=qm0#eIh_jhl~gn2*Y6uK{H;)Lb-!mX$`rweJG7~DXCti)QBb+ zB)M$K#|6Ud0-Gl~vFDx^55;#z?_c3nz}xk6N_ zXd`A~mZxa_2kMx)vZ{5NP;iGng9?wa&|~FP8D$0-EGj0?t)c~Nb&UR}WK-Xt^+!2| Xn~y@R%9m#!d)@p3OcP77X955Krcjr+ literal 0 HcmV?d00001 diff --git a/yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/10706_hap5_chrI.csv.gz b/yeastregulatorydb/regulatory_data/tests/test_data/binding/chipexo/10706_hap5_chrI.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..24d90b5ccf7d41b00ca5923c5137b0876ad664da GIT binary patch literal 372 zcmV-)0gL`0iwFP!0000017(s+OC3QFM)&@fnxgC7wN?V|1Xr#?(4c{f!p;5vSl#Vy zA=%8Co;lRxn|Xda!{5uV^96ptzrfSO-OtBAuamcz^J(6kK3@69$LVDs9&VoB&i6ph zU3J005*bJc7<&dBOc>3srWyG{fYlqwRu@4QF17CYTW!r2e5ei?*h{7VOm?va2X!Qf zKpRC<;h;onepQvJEfA>Iq}l5-Mx|06J>{y3d0QZmtb)CjD%hx(YG}DlB3p2fHu`go z%NQ~GH*C_knraIrNEKC?`%|5d#2&l7Mw0~v9-H@(6r20A3W{GqC+T3D@mV2H0Y`h@AT0?ry&CTF&%;2j? zp?x6>6%kvBf)YMW-pyOiFbnmgOlgMR>>p2!ZK1pvUdKublPYw>^qr4t str: + """ + Serialize the metadata of a Django model + + :param model: The Django model + :type model: Model + + :return: The serialized metadata + :rtype: str + """ + metadata = {"model_name": model.__name__, "fields": [], "relationships": []} + + for field in model._meta.get_fields(): + if field.is_relation: + rel_type = ( + "ForeignKey" + if isinstance(field, ForeignKey) + else ( + "OneToOneField" + if isinstance(field, OneToOneField) + else "ManyToManyField" + if isinstance(field, ManyToManyField) + else "UnknownRelation" + ) + ) + metadata["relationships"].append( + {"name": field.name, "related_model": field.related_model.__name__, "type": rel_type} + ) + else: + metadata["fields"].append({"name": field.name, "type": field.get_internal_type()}) + + return json.dumps(metadata, indent=2) + + +def create_uml_svg_from_metadata(metadata_json: str) -> str: + """ + Create a UML diagram in SVG format from the serialized model metadata + + :param metadata_json: The serialized model metadata + :type metadata_json: str + + :return: The SVG content + :rtype: str + """ + metadata = json.loads(metadata_json) + + svg_start = '' + svg_end = "" + text_template = '{text}' + rect_template = ( + '' + ) + + svg_content = "" + x, y = 10, 20 + char_width = 8 # Approximate width of each character in pixels + line_height = 20 + padding = 10 # Padding for text inside rectangles + + # Calculate max width based on the longest field name or relationship text + max_text_length = len(metadata["model_name"]) * char_width + for field in metadata["fields"]: + field_text = f"{field['name']} ({field['type']})" + max_text_length = max(max_text_length, len(field_text) * char_width) + for rel in metadata["relationships"]: + rel_text = f"{rel['name']} -> {rel['related_model']} ({rel['type']})" + max_text_length = max(max_text_length, len(rel_text) * char_width) + + max_width = max_text_length + (2 * padding) # Add padding to both sides + + # Model name + svg_content += rect_template.format(x=x, y=y, width=max_width, height=line_height, fill="lightgrey") + svg_content += text_template.format(x=x + padding, y=y + 15, text=metadata["model_name"]) + y += line_height + + # Fields + for field in metadata["fields"]: + y += line_height + field_text = f"{field['name']} ({field['type']})" + svg_content += rect_template.format(x=x, y=y, width=max_width, height=line_height, fill="white") + svg_content += text_template.format(x=x + padding, y=y + 15, text=field_text) + + # Relationships + y += line_height # Extra space before relationships + for rel in metadata["relationships"]: + y += line_height + rel_text = f"{rel['name']} -> {rel['related_model']} ({rel['type']})" + svg_content += rect_template.format(x=x, y=y, width=max_width, height=line_height, fill="white") + svg_content += text_template.format(x=x + padding, y=y + 15, text=rel_text) + + svg_height = y + 30 # Add some padding at the bottom + + svg = svg_start.format(width=max_width + 20, height=svg_height) + svg_content + svg_end + + return svg + + +def create_model_diagram(model: Model) -> tuple[str, str]: + """ + Create a UML diagram in SVG format from a Django model + + :param model: The Django model + :type model: Model + + :return: The serialized metadata and the SVG content + :rtype: Tuple[str, str] + """ + metadata = serialize_model_metadata(model) + svg = create_uml_svg_from_metadata(metadata) + return metadata, svg