Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

👌 IMPROVE: Add SQLA migration for parity with Django schema #5097

Merged
merged 36 commits into from
Jan 15, 2022
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
7a849f0
Change SQLA models for parity with Django
chrisjsewell Aug 25, 2021
fdfa555
add migrations
chrisjsewell Aug 25, 2021
3ab7065
add tests
chrisjsewell Aug 25, 2021
969b8b6
Update 1de112340b17_django_parity_2.py
chrisjsewell Aug 25, 2021
35621dc
Add example index with `varchar_pattern_ops`
chrisjsewell Aug 25, 2021
72a4b9f
Merge remote-tracking branch 'upstream/develop' into sqla-migration
chrisjsewell Dec 8, 2021
cfdfef2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 8, 2021
f94129c
update initial revision
chrisjsewell Dec 8, 2021
cbecb79
add index migration
chrisjsewell Dec 9, 2021
95ed9d0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 9, 2021
effaac2
fix schema diff
chrisjsewell Dec 9, 2021
f5a1388
Merge branch 'develop' into sqla-migration
chrisjsewell Dec 9, 2021
5719b07
Add missing indexes
chrisjsewell Dec 10, 2021
7b3ea13
update
chrisjsewell Dec 13, 2021
32ab71c
Merge branch 'develop' into sqla-migration
chrisjsewell Dec 13, 2021
fa55f63
update
chrisjsewell Dec 14, 2021
3631a8f
update
chrisjsewell Dec 14, 2021
2e0877d
fix postgresql_ops
chrisjsewell Dec 14, 2021
74b3f06
add test
chrisjsewell Dec 14, 2021
e5d8282
🐛 FIX: SQLA migration regression
chrisjsewell Dec 14, 2021
95f80bb
Add db_dblog.levelname truncation
chrisjsewell Dec 14, 2021
d522b44
fix migration freezing
chrisjsewell Jan 12, 2022
a519eff
move logic higher up
chrisjsewell Jan 12, 2022
857d998
Merge branch 'develop' into sqla-migration
chrisjsewell Jan 13, 2022
9df317c
Merge branch 'develop' into sqla-migration
chrisjsewell Jan 13, 2022
855958f
Merge branch 'develop' into sqla-migration
chrisjsewell Jan 13, 2022
959924f
add comment on the reason for calling transaction.close
chrisjsewell Jan 14, 2022
c07f907
Merge branch 'develop' into sqla-migration
chrisjsewell Jan 14, 2022
be0c2bf
Try reverting change to pre v2 migration
chrisjsewell Jan 14, 2022
f208bcd
Apply suggestions from code review
chrisjsewell Jan 14, 2022
266809d
add note on nullable foreign keys
chrisjsewell Jan 14, 2022
365461c
Apply suggestions from code review
chrisjsewell Jan 14, 2022
1037c75
remove migration of null link type to ''
chrisjsewell Jan 14, 2022
734dc28
Apply suggestions from code review
chrisjsewell Jan 14, 2022
708de2f
Update test_12_sqla_django_parity.py
chrisjsewell Jan 15, 2022
8a77436
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 15, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,13 @@ class Migration(migrations.Migration):
old_name='name',
new_name='label',
),
migrations.RunSQL(
'ALTER INDEX db_dbcomputer_name_key rename TO db_dbcomputer_label_bc480bab_uniq',
'ALTER INDEX db_dbcomputer_label_bc480bab_uniq rename TO db_dbcomputer_name_key',
),
migrations.RunSQL(
'ALTER INDEX db_dbcomputer_name_f1800b1a_like rename TO db_dbcomputer_label_bc480bab_like',
'ALTER INDEX db_dbcomputer_label_bc480bab_like rename TO db_dbcomputer_name_f1800b1a_like',
),
upgrade_schema_version(REVISION, DOWN_REVISION),
]
17 changes: 13 additions & 4 deletions aiida/backends/sqlalchemy/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class SqlaBackendManager(BackendManager):

@staticmethod
@contextlib.contextmanager
def alembic_config():
def alembic_config(start_transaction=True):
"""Context manager to return an instance of an Alembic configuration.

The current database connection is added in the `attributes` property, through which it can then also be
Expand All @@ -41,7 +41,16 @@ def alembic_config():

from . import ENGINE

with ENGINE.begin() as connection:
# Certain migrations, such as altering tables, require that there is no existing transactions
# locking the tables.
# Presently, ``SqlaSettingsManager.get`` has been found to leave idle transactions,
# and so we need to ensure that they are closed.
transaction = get_scoped_session().get_transaction()
if transaction:
transaction.close()
sphuber marked this conversation as resolved.
Show resolved Hide resolved

engine_context = ENGINE.begin if start_transaction else ENGINE.connect
with engine_context() as connection:
dir_path = os.path.dirname(os.path.realpath(__file__))
config = Config()
config.set_main_option('script_location', os.path.join(dir_path, ALEMBIC_REL_PATH))
Expand Down Expand Up @@ -142,15 +151,15 @@ def migrate_up(self, version: str):

:param version: string with schema version to migrate to
"""
with self.alembic_config() as config:
with self.alembic_config(start_transaction=False) as config:
upgrade(config, version)

def migrate_down(self, version: str):
"""Migrate the database down to a specific version.

:param version: string with schema version to migrate to
"""
with self.alembic_config() as config:
with self.alembic_config(start_transaction=False) as config:
downgrade(config, version)

def _migrate_database_version(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
# pylint: disable=invalid-name,no-member
"""Parity with Django backend (rev: 0048),
part 1: Ensure fields to make non-nullable are not currently null

Revision ID: 1de112340b16
Revises: 34a831f4286d
Create Date: 2021-08-24 18:52:45.882712

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB, UUID

from aiida.common import timezone
from aiida.common.utils import get_new_uuid

# revision identifiers, used by Alembic.
revision = '1de112340b16'
down_revision = '34a831f4286d'
branch_labels = None
depends_on = None


def upgrade(): # pylint: disable=too-many-statements
"""Convert null values to default values.

This migration is performed in preparation for the next migration,
which will make these fields non-nullable.

Note, it is technically possible that the following foreign keys could also be null
(due to no explicit nullable=False):
`db_dbauthinfo.aiidauser_id`, `db_dbauthinfo.dbcomputer_id`,
`db_dbcomment.dbnode_id`, `db_dbcomment.user_id`,
`db_dbgroup.user_id`, `db_dbgroup_dbnode.dbgroup_id`, `db_dbgroup_dbnode.dbnode_id`,
`db_dblink.input_id`, `db_dblink.output_id`

However, there is no default value for these fields, and the Python API does not allow them to be set to `None`,
so it would be extremely unlikely for this to be the case.

Also, `db_dbnode.node_type` and `db_dblink.type` should not be null but, since this would critically corrupt
the provence graph if we were to set this to an empty string, we leave this to fail the non-null migration.
If a user runs into this exception, they will contact us and we can come up with a custom fix for the database.

"""
db_dbauthinfo = sa.sql.table(
'db_dbauthinfo',
sa.Column('enabled', sa.Boolean),
sa.Column('auth_params', JSONB),
sa.Column('metadata', JSONB),
)

op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.enabled.is_(None)).values(enabled=True))
op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.auth_params.is_(None)).values(auth_params={}))
op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.metadata.is_(None)).values(metadata={}))
op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.auth_params == JSONB.NULL).values(auth_params={}))
op.execute(db_dbauthinfo.update().where(db_dbauthinfo.c.metadata == JSONB.NULL).values(metadata={}))

db_dbcomment = sa.sql.table(
'db_dbcomment',
sa.Column('content', sa.Text),
sa.Column('ctime', sa.DateTime(timezone=True)),
sa.Column('mtime', sa.DateTime(timezone=True)),
sa.Column('uuid', UUID(as_uuid=True)),
)

op.execute(db_dbcomment.update().where(db_dbcomment.c.content.is_(None)).values(content=''))
op.execute(db_dbcomment.update().where(db_dbcomment.c.mtime.is_(None)).values(mtime=timezone.now()))
op.execute(db_dbcomment.update().where(db_dbcomment.c.ctime.is_(None)).values(ctime=timezone.now()))
sphuber marked this conversation as resolved.
Show resolved Hide resolved
op.execute(db_dbcomment.update().where(db_dbcomment.c.uuid.is_(None)).values(uuid=get_new_uuid()))

db_dbcomputer = sa.sql.table(
'db_dbcomputer',
sa.Column('description', sa.Text),
sa.Column('hostname', sa.String(255)),
sa.Column('metadata', JSONB),
sa.Column('scheduler_type', sa.String(255)),
sa.Column('transport_type', sa.String(255)),
sa.Column('uuid', UUID(as_uuid=True)),
)

op.execute(db_dbcomputer.update().where(db_dbcomputer.c.description.is_(None)).values(description=''))
op.execute(db_dbcomputer.update().where(db_dbcomputer.c.hostname.is_(None)).values(hostname=''))
op.execute(db_dbcomputer.update().where(db_dbcomputer.c.metadata.is_(None)).values(metadata={}))
op.execute(db_dbcomputer.update().where(db_dbcomputer.c.metadata == JSONB.NULL).values(metadata={}))
op.execute(db_dbcomputer.update().where(db_dbcomputer.c.scheduler_type.is_(None)).values(scheduler_type=''))
op.execute(db_dbcomputer.update().where(db_dbcomputer.c.transport_type.is_(None)).values(transport_type=''))
op.execute(db_dbcomputer.update().where(db_dbcomputer.c.uuid.is_(None)).values(uuid=get_new_uuid()))

db_dbgroup = sa.sql.table(
'db_dbgroup',
sa.Column('description', sa.Text),
sa.Column('label', sa.String(255)),
sa.Column('time', sa.DateTime(timezone=True)),
sa.Column('type_string', sa.String(255)),
sa.Column('uuid', UUID(as_uuid=True)),
)

op.execute(db_dbgroup.update().where(db_dbgroup.c.description.is_(None)).values(description=''))
op.execute(db_dbgroup.update().where(db_dbgroup.c.label.is_(None)).values(label=get_new_uuid()))
op.execute(db_dbgroup.update().where(db_dbgroup.c.time.is_(None)).values(time=timezone.now()))
op.execute(db_dbgroup.update().where(db_dbgroup.c.type_string.is_(None)).values(type_string='core'))
op.execute(db_dbgroup.update().where(db_dbgroup.c.uuid.is_(None)).values(uuid=get_new_uuid()))

db_dblog = sa.sql.table(
'db_dblog',
sa.Column('levelname', sa.String(255)),
sa.Column('loggername', sa.String(255)),
sa.Column('message', sa.Text),
sa.Column('metadata', JSONB),
sa.Column('time', sa.DateTime(timezone=True)),
sa.Column('uuid', UUID(as_uuid=True)),
)

op.execute(db_dblog.update().where(db_dblog.c.levelname.is_(None)).values(levelname=''))
op.execute(db_dblog.update().values(levelname=db_dblog.c.levelname.cast(sa.String(50))))
op.execute(db_dblog.update().where(db_dblog.c.loggername.is_(None)).values(loggername=''))
op.execute(db_dblog.update().where(db_dblog.c.message.is_(None)).values(message=''))
op.execute(db_dblog.update().where(db_dblog.c.metadata.is_(None)).values(metadata={}))
op.execute(db_dblog.update().where(db_dblog.c.metadata == JSONB.NULL).values(metadata={}))
op.execute(db_dblog.update().where(db_dblog.c.time.is_(None)).values(time=timezone.now()))
op.execute(db_dblog.update().where(db_dblog.c.uuid.is_(None)).values(uuid=get_new_uuid()))

db_dbnode = sa.sql.table(
'db_dbnode',
sa.Column('ctime', sa.DateTime(timezone=True)),
sa.Column('description', sa.Text),
sa.Column('label', sa.String(255)),
sa.Column('mtime', sa.DateTime(timezone=True)),
sa.Column('node_type', sa.String(255)),
sa.Column('uuid', UUID(as_uuid=True)),
)

op.execute(db_dbnode.update().where(db_dbnode.c.ctime.is_(None)).values(ctime=timezone.now()))
op.execute(db_dbnode.update().where(db_dbnode.c.description.is_(None)).values(description=''))
op.execute(db_dbnode.update().where(db_dbnode.c.label.is_(None)).values(label=''))
op.execute(db_dbnode.update().where(db_dbnode.c.mtime.is_(None)).values(mtime=timezone.now()))
op.execute(db_dbnode.update().where(db_dbnode.c.uuid.is_(None)).values(uuid=get_new_uuid()))

db_dbsetting = sa.sql.table(
'db_dbsetting',
sa.Column('time', sa.DateTime(timezone=True)),
)

op.execute(db_dbsetting.update().where(db_dbsetting.c.time.is_(None)).values(time=timezone.now()))

db_dbuser = sa.sql.table(
'db_dbuser',
sa.Column('email', sa.String(254)),
sa.Column('first_name', sa.String(254)),
sa.Column('last_name', sa.String(254)),
sa.Column('institution', sa.String(254)),
)

op.execute(db_dbuser.update().where(db_dbuser.c.email.is_(None)).values(email=get_new_uuid()))
op.execute(db_dbuser.update().where(db_dbuser.c.first_name.is_(None)).values(first_name=''))
op.execute(db_dbuser.update().where(db_dbuser.c.last_name.is_(None)).values(last_name=''))
op.execute(db_dbuser.update().where(db_dbuser.c.institution.is_(None)).values(institution=''))


def downgrade():
"""Downgrade database schema."""
# No need to convert the values back to null
Loading