Skip to content

Commit

Permalink
Merge pull request #35514 from dimagi/mjr/form-deletion-script
Browse files Browse the repository at this point in the history
Added a form deletion script for genie requests
  • Loading branch information
mjriley authored Dec 12, 2024
2 parents c65c7e3 + 1d208a7 commit e67f804
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 5 deletions.
56 changes: 56 additions & 0 deletions corehq/apps/cleanup/management/commands/hard_delete_forms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from django.core.management.base import BaseCommand, CommandError
import csv
import itertools
from dimagi.utils.chunked import chunked
from corehq.form_processor.models import XFormInstance


INDEX_FORM_ID = 0
CHUNK_SIZE = 100


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('domain', help='Domain name that owns the forms to be deleted')
parser.add_argument('filename', help='path to the CSV file')
parser.add_argument('--resume_id', help='form ID to start at, within the CSV file')

def handle(self, domain, filename, resume_id=None, **options):
# expects the filename to have a CSV with a header containing a "Form ID" field
with open(filename, mode='r', encoding='utf-8-sig') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
self._process_rows(reader, domain, resume_id)

def _process_rows(self, rows, domain, resume_id):
header_row = next(rows) # skip header line
if header_row[INDEX_FORM_ID] != 'Form ID':
raise CommandError(
f'Expected Column {INDEX_FORM_ID} to be "Form ID", found "{header_row[INDEX_FORM_ID]}". Exiting'
)

num_deleted = 0

if resume_id:
print('resuming at: ', resume_id)
rows = itertools.dropwhile(lambda row: row[INDEX_FORM_ID] != resume_id, rows)

print('Starting form deletion')
for chunk in chunked(rows, CHUNK_SIZE):
form_ids = [row[INDEX_FORM_ID] for row in chunk]

try:
deleted_form_ids = set(XFormInstance.objects.hard_delete_forms(
domain, form_ids, return_ids=True))
except Exception:
print('failed during processing of: ', form_ids)
raise

for form_id in form_ids:
if form_id in deleted_form_ids:
print('Deleted: ', form_id)
else:
print('Not found:', form_id)

num_deleted += len(deleted_form_ids)

print(f'Complete -- removed {num_deleted} forms')
16 changes: 11 additions & 5 deletions corehq/form_processor/models/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,8 @@ def soft_undelete_forms(self, domain, form_ids):

return count

def hard_delete_forms(self, domain, form_ids, delete_attachments=True, *, publish_changes=True):
def hard_delete_forms(
self, domain, form_ids, return_ids=False, delete_attachments=True, *, publish_changes=True):
"""Delete forms permanently
:param publish_changes: Flag for change feed publication.
Expand All @@ -398,12 +399,17 @@ def hard_delete_forms(self, domain, form_ids, delete_attachments=True, *, publis
assert isinstance(form_ids, list)

deleted_count = 0
deleted_ids = []
for db_name, split_form_ids in split_list_by_db_partition(form_ids):
# cascade should delete the operations
_, deleted_models = self.using(db_name).filter(
domain=domain, form_id__in=split_form_ids
).delete()
query = self.using(db_name).filter(domain=domain, form_id__in=split_form_ids)
with transaction.atomic():
if return_ids:
found_forms = list(query.values_list('form_id', flat=True))
_, deleted_models = query.delete()
deleted_count += deleted_models.get(self.model._meta.label, 0)
if return_ids:
deleted_ids.extend(found_forms)

if delete_attachments and deleted_count:
if deleted_count != len(form_ids):
Expand All @@ -421,7 +427,7 @@ def hard_delete_forms(self, domain, form_ids, delete_attachments=True, *, publis
if publish_changes:
self.publish_deleted_forms(domain, form_ids)

return deleted_count
return deleted_ids if return_ids else deleted_count

@staticmethod
def publish_deleted_forms(domain, form_ids):
Expand Down
16 changes: 16 additions & 0 deletions corehq/form_processor/tests/test_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,22 @@ def test_hard_delete_forms(self):
self.assertEqual(1, len(forms))
self.assertEqual(form_ids[0], forms[0].form_id)

def test_hard_delete_forms_returns_forms_found(self):
for i in range(3):
create_form_for_test(DOMAIN, form_id=str(i))

deleted_form_ids = set(XFormInstance.objects.hard_delete_forms(DOMAIN, ['0', '1', '2'], return_ids=True))

self.assertEqual(deleted_form_ids, {'0', '1', '2'})

def test_hard_delete_forms_does_not_include_missing_form_ids(self):
create_form_for_test(DOMAIN, form_id='1')
create_form_for_test(DOMAIN, form_id='3')

deleted_form_ids = set(XFormInstance.objects.hard_delete_forms(DOMAIN, ['1', '2', '3'], return_ids=True))

self.assertEqual(deleted_form_ids, {'1', '3'})

def assert_form_xml_attachment(self, form):
attachments = XFormInstance.objects.get_attachments(form.form_id)
self.assertEqual([a.name for a in attachments], ["form.xml"])
Expand Down

0 comments on commit e67f804

Please sign in to comment.