Skip to content

Commit

Permalink
Improve storage maintain related methods
Browse files Browse the repository at this point in the history
Moved the general purpose functions into methods of the storage class

Use the lock mechanism for the maintain operation
  • Loading branch information
ramirezfranciscof committed Feb 22, 2022
1 parent b57e1b6 commit 4e76e51
Show file tree
Hide file tree
Showing 11 changed files with 164 additions and 172 deletions.
32 changes: 20 additions & 12 deletions aiida/cmdline/commands/cmd_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,16 @@ def storage_integrity():
def storage_info(statistics):
"""Summarise the contents of the storage."""
from aiida.cmdline.utils.common import get_database_summary
from aiida.manage.manager import get_manager
from aiida.orm import QueryBuilder
from aiida.storage.control import get_repository_info

manager = get_manager()
storage = manager.get_profile_storage()

with spinner():
data = {
'database': get_database_summary(QueryBuilder, statistics),
'repository': get_repository_info(statistics=statistics),
'repository': storage.get_info(statistics=statistics),
}

echo.echo_dictionary(data, sort_keys=False, fmt='yaml')
Expand All @@ -119,25 +122,30 @@ def storage_info(statistics):
)
def storage_maintain(full, dry_run):
"""Performs maintenance tasks on the repository."""
from aiida.storage.control import repository_maintain
from aiida.manage.manager import get_manager

manager = get_manager()
profile = manager.get_profile()
storage = manager.get_profile_storage()

if full:
echo.echo_warning(
'\nIn order to safely perform the full maintenance operations on the internal storage, no other '
'process should be using the AiiDA profile being maintained. '
'This includes daemon workers, verdi shells, scripts with the profile loaded, etc). '
'Please make sure there is nothing like this currently running and that none is started until '
'these procedures conclude. '
'For performing maintanance operations that are safe to run while actively using AiiDA, just run '
'`verdi storage maintain`, without the `--full` flag.\n'
'\nIn order to safely perform the full maintenance operations on the internal storage, the profile '
f'{profile.name} needs to be locked. '
'This means that no other process will be able to access it and will fail instead. '
'Moreover, if any process is already using the profile, the locking attempt will fail and you will '
'have to either look for these processes and kill them or wait for them to stop by themselves. '
'Note that this includes verdi shells, daemon workers, scripts that manually load it, etc.\n'
'For performing maintenance operations that are safe to run while actively using AiiDA, just run '
'`verdi storage maintain` without the `--full` flag.\n'
)

else:
echo.echo(
'\nThis command will perform all maintenance operations on the internal storage that can be safely '
'executed while still running AiiDA. '
'However, not all operations that are required to fully optimize disk usage and future performance '
'can be done in this way. '
'can be done in this way.\n'
'Whenever you find the time or opportunity, please consider running `verdi repository maintenance '
'--full` for a more complete optimization.\n'
)
Expand All @@ -146,5 +154,5 @@ def storage_maintain(full, dry_run):
if not click.confirm('Are you sure you want continue in this mode?'):
return

repository_maintain(full=full, dry_run=dry_run)
storage.maintain(full=full, dry_run=dry_run)
echo.echo_success('Requested maintenance procedures finished.')
23 changes: 23 additions & 0 deletions aiida/orm/implementation/storage_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,26 @@ def get_global_variable(self, key: str) -> Union[None, str, int, float]:
:raises: `KeyError` if the setting does not exist
"""

@abc.abstractmethod
def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None:
"""Performs maintenance tasks on the storage.
If `full == True`, then this method may attempt to block the profile associated with the
storage to guarantee the safety of its procedures. This will not only prevent any other
subsequent process from accessing that profile, but will also first check if there is
already any process using it and raise if that is the case. The user will have to manually
stop any processes that is currently accessing the profile themselves or wait for it to
finish on its own.
:param full:
flag to perform operations that require to stop using the profile to be maintained.
:param dry_run:
flag to only print the actions that would be taken without actually executing them.
"""

@abc.abstractmethod
def get_info(self, statistics: bool = False) -> dict:
"""Returns general information on the storage."""
7 changes: 3 additions & 4 deletions aiida/repository/backend/disk_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
from disk_objectstore import Container

from aiida.common.lang import type_check
from aiida.storage.log import STORAGE_LOGGER

from .abstract import AbstractRepositoryBackend

__all__ = ('DiskObjectStoreRepositoryBackend',)

BYTES_TO_MB = 1 / 1024**2

logger = STORAGE_LOGGER.getChild('disk_object_store')


class DiskObjectStoreRepositoryBackend(AbstractRepositoryBackend):
"""Implementation of the ``AbstractRepositoryBackend`` using the ``disk-object-store`` as the backend.
Expand Down Expand Up @@ -156,10 +159,6 @@ def maintain( # type: ignore[override] # pylint: disable=arguments-differ,too-ma
:param do_vacuum:flag for forcing the vacuuming of the internal database when cleaning the repository.
:return:a dictionary with information on the operations performed.
"""
from aiida.storage.control import MAINTAIN_LOGGER

logger = MAINTAIN_LOGGER.getChild('disk_object_store')

if live and (do_repack or clean_storage or do_vacuum):
overrides = {'do_repack': do_repack, 'clean_storage': clean_storage, 'do_vacuum': do_vacuum}
keys = ', '.join([key for key, override in overrides if override is True]) # type: ignore
Expand Down
2 changes: 0 additions & 2 deletions aiida/storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@
# yapf: disable
# pylint: disable=wildcard-import

from .control import *

__all__ = (
'MAINTAIN_LOGGER',
)

# yapf: enable
Expand Down
101 changes: 0 additions & 101 deletions aiida/storage/control.py

This file was deleted.

14 changes: 14 additions & 0 deletions aiida/storage/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""Initialize the storage logger."""

from aiida.common.log import AIIDA_LOGGER

STORAGE_LOGGER = AIIDA_LOGGER.getChild('storage')
56 changes: 55 additions & 1 deletion aiida/storage/psql_dos/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# pylint: disable=missing-function-docstring
from contextlib import contextmanager, nullcontext
import functools
from typing import TYPE_CHECKING, Iterator, List, Optional, Sequence, Union
from typing import TYPE_CHECKING, Iterator, List, Optional, Sequence, Set, Union

from disk_objectstore import Container
from sqlalchemy import table
Expand All @@ -22,6 +22,7 @@
from aiida.orm import User
from aiida.orm.entities import EntityTypes
from aiida.orm.implementation import BackendEntity, StorageBackend
from aiida.storage.log import STORAGE_LOGGER
from aiida.storage.psql_dos.migrator import REPOSITORY_UUID_KEY, PsqlDostoreMigrator
from aiida.storage.psql_dos.models import base

Expand Down Expand Up @@ -346,3 +347,56 @@ def get_global_variable(self, key: str) -> Union[None, str, int, float]:
if setting is None:
raise KeyError(f'No setting found with key {key}')
return setting.val

def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None:
from aiida.manage.profile_access import ProfileAccessManager

repository = self.get_repository()

if full:
maintenance_context = ProfileAccessManager(self._profile).lock
else:
maintenance_context = nullcontext

with maintenance_context():
unreferenced_objects = self.get_unreferenced_keyset()
STORAGE_LOGGER.info(f'Deleting {len(unreferenced_objects)} unreferenced objects ...')
if not dry_run:
repository.delete_objects(list(unreferenced_objects))

STORAGE_LOGGER.info('Starting repository-specific operations ...')
repository.maintain(live=not full, dry_run=dry_run, **kwargs)

def get_unreferenced_keyset(self, check_consistency: bool = True) -> Set[str]:
"""Returns the keyset of objects that exist in the repository but are not tracked by AiiDA.
This should be all the soft-deleted files.
:param check_consistency:
toggle for a check that raises if there are references in the database with no actual object in the
underlying repository.
:return:
a set with all the objects in the underlying repository that are not referenced in the database.
"""
from aiida import orm

STORAGE_LOGGER.info('Obtaining unreferenced object keys ...')

repository = self.get_repository()

keyset_repository = set(repository.list_objects())
keyset_database = set(orm.Node.objects(self).iter_repo_keys())

if check_consistency:
keyset_missing = keyset_database - keyset_repository
if len(keyset_missing) > 0:
raise RuntimeError(
'There are objects referenced in the database that are not present in the repository. Aborting!'
)

return keyset_repository - keyset_database

def get_info(self, statistics: bool = False) -> dict:
repository = self.get_repository()
return repository.get_info(statistics)
6 changes: 6 additions & 0 deletions aiida/tools/archive/implementations/sqlite/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,12 @@ def get_global_variable(self, key: str):
def set_global_variable(self, key: str, value, description: Optional[str] = None, overwrite=True) -> None:
raise ReadOnlyError()

def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None:
raise NotImplementedError

def get_info(self, statistics: bool = False) -> dict:
raise NotImplementedError


def create_backend_cls(base_class, model_cls):
"""Create an archive backend class for the given model class."""
Expand Down
25 changes: 19 additions & 6 deletions tests/cmdline/commands/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,19 +118,32 @@ def tests_storage_maintain_logging(run_cli_command, monkeypatch, caplog):
"""Test all the information and cases of the storage maintain command."""
import logging

from aiida.storage import control
from aiida.manage import get_manager
storage = get_manager().get_profile_storage()

def mock_maintain(*args, **kwargs):
"""Mocks for `maintain` method of `storage`, logging the inputs passed."""
log_message = ''

log_message += 'Provided args:\n'
for arg in args:
log_message += f' > {arg}\n'

def mock_maintain(**kwargs):
logmsg = 'Provided kwargs:\n'
log_message += 'Provided kwargs:\n'
for key, val in kwargs.items():
logmsg += f' > {key}: {val}\n'
logging.info(logmsg)
log_message += f' > {key}: {val}\n'

monkeypatch.setattr(control, 'repository_maintain', mock_maintain)
logging.info(log_message)

monkeypatch.setattr(storage, 'maintain', mock_maintain)

with caplog.at_level(logging.INFO):
_ = run_cli_command(cmd_storage.storage_maintain, user_input='Y')

for record in caplog.records:
print(record.msg.splitlines())
print('-----')

message_list = caplog.records[0].msg.splitlines()
assert ' > full: False' in message_list
assert ' > dry_run: False' in message_list
Expand Down
2 changes: 1 addition & 1 deletion tests/repository/backend/test_disk_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_maintain_logging(caplog, populated_repository, do_vacuum):
list_of_logmsg = []
for record in caplog.records:
assert record.levelname == 'REPORT'
assert record.name == 'aiida.maintain.disk_object_store'
assert record.name.endswith('.disk_object_store')
list_of_logmsg.append(record.msg)

assert 'packing' in list_of_logmsg[0].lower()
Expand Down
Loading

0 comments on commit 4e76e51

Please sign in to comment.