diff --git a/corehq/apps/cleanup/management/commands/hard_delete_forms.py b/corehq/apps/cleanup/management/commands/hard_delete_forms.py new file mode 100644 index 000000000000..9adf19be1511 --- /dev/null +++ b/corehq/apps/cleanup/management/commands/hard_delete_forms.py @@ -0,0 +1,56 @@ +from django.core.management.base import BaseCommand, CommandError +import csv +import itertools +from dimagi.utils.chunked import chunked +from corehq.form_processor.models import XFormInstance + + +INDEX_FORM_ID = 0 +CHUNK_SIZE = 100 + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument('domain', help='Domain name that owns the forms to be deleted') + parser.add_argument('filename', help='path to the CSV file') + parser.add_argument('--resume_id', help='form ID to start at, within the CSV file') + + def handle(self, domain, filename, resume_id=None, **options): + # expects the filename to have a CSV with a header containing a "Form ID" field + with open(filename, mode='r', encoding='utf-8-sig') as csvfile: + reader = csv.reader(csvfile, delimiter=',') + self._process_rows(reader, domain, resume_id) + + def _process_rows(self, rows, domain, resume_id): + header_row = next(rows) # skip header line + if header_row[INDEX_FORM_ID] != 'Form ID': + raise CommandError( + f'Expected Column {INDEX_FORM_ID} to be "Form ID", found "{header_row[INDEX_FORM_ID]}". Exiting' + ) + + num_deleted = 0 + + if resume_id: + print('resuming at: ', resume_id) + rows = itertools.dropwhile(lambda row: row[INDEX_FORM_ID] != resume_id, rows) + + print('Starting form deletion') + for chunk in chunked(rows, CHUNK_SIZE): + form_ids = [row[INDEX_FORM_ID] for row in chunk] + + try: + deleted_form_ids = set(XFormInstance.objects.hard_delete_forms( + domain, form_ids, return_ids=True)) + except Exception: + print('failed during processing of: ', form_ids) + raise + + for form_id in form_ids: + if form_id in deleted_form_ids: + print('Deleted: ', form_id) + else: + print('Not found:', form_id) + + num_deleted += len(deleted_form_ids) + + print(f'Complete -- removed {num_deleted} forms') diff --git a/corehq/form_processor/models/forms.py b/corehq/form_processor/models/forms.py index cdf4f2ae5440..52b12e52299e 100644 --- a/corehq/form_processor/models/forms.py +++ b/corehq/form_processor/models/forms.py @@ -389,7 +389,8 @@ def soft_undelete_forms(self, domain, form_ids): return count - def hard_delete_forms(self, domain, form_ids, delete_attachments=True, *, publish_changes=True): + def hard_delete_forms( + self, domain, form_ids, return_ids=False, delete_attachments=True, *, publish_changes=True): """Delete forms permanently :param publish_changes: Flag for change feed publication. @@ -398,12 +399,17 @@ def hard_delete_forms(self, domain, form_ids, delete_attachments=True, *, publis assert isinstance(form_ids, list) deleted_count = 0 + deleted_ids = [] for db_name, split_form_ids in split_list_by_db_partition(form_ids): # cascade should delete the operations - _, deleted_models = self.using(db_name).filter( - domain=domain, form_id__in=split_form_ids - ).delete() + query = self.using(db_name).filter(domain=domain, form_id__in=split_form_ids) + with transaction.atomic(): + if return_ids: + found_forms = list(query.values_list('form_id', flat=True)) + _, deleted_models = query.delete() deleted_count += deleted_models.get(self.model._meta.label, 0) + if return_ids: + deleted_ids.extend(found_forms) if delete_attachments and deleted_count: if deleted_count != len(form_ids): @@ -421,7 +427,7 @@ def hard_delete_forms(self, domain, form_ids, delete_attachments=True, *, publis if publish_changes: self.publish_deleted_forms(domain, form_ids) - return deleted_count + return deleted_ids if return_ids else deleted_count @staticmethod def publish_deleted_forms(domain, form_ids): diff --git a/corehq/form_processor/tests/test_forms.py b/corehq/form_processor/tests/test_forms.py index 07810d2d97fc..1e8ad386cf1e 100644 --- a/corehq/form_processor/tests/test_forms.py +++ b/corehq/form_processor/tests/test_forms.py @@ -354,6 +354,22 @@ def test_hard_delete_forms(self): self.assertEqual(1, len(forms)) self.assertEqual(form_ids[0], forms[0].form_id) + def test_hard_delete_forms_returns_forms_found(self): + for i in range(3): + create_form_for_test(DOMAIN, form_id=str(i)) + + deleted_form_ids = set(XFormInstance.objects.hard_delete_forms(DOMAIN, ['0', '1', '2'], return_ids=True)) + + self.assertEqual(deleted_form_ids, {'0', '1', '2'}) + + def test_hard_delete_forms_does_not_include_missing_form_ids(self): + create_form_for_test(DOMAIN, form_id='1') + create_form_for_test(DOMAIN, form_id='3') + + deleted_form_ids = set(XFormInstance.objects.hard_delete_forms(DOMAIN, ['1', '2', '3'], return_ids=True)) + + self.assertEqual(deleted_form_ids, {'1', '3'}) + def assert_form_xml_attachment(self, form): attachments = XFormInstance.objects.get_attachments(form.form_id) self.assertEqual([a.name for a in attachments], ["form.xml"])