From b770348542452c1bf5c384a49b7f63e3f410edc5 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Mon, 9 Dec 2024 17:33:04 +0200 Subject: [PATCH 1/6] Script to create test data --- .../management/commands/create_test_data.py | 283 ++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 corehq/apps/geospatial/management/commands/create_test_data.py diff --git a/corehq/apps/geospatial/management/commands/create_test_data.py b/corehq/apps/geospatial/management/commands/create_test_data.py new file mode 100644 index 000000000000..df2c312f2a2c --- /dev/null +++ b/corehq/apps/geospatial/management/commands/create_test_data.py @@ -0,0 +1,283 @@ +import random +from uuid import uuid4 + +from django.core.management.base import BaseCommand + +from shapely.geometry import Point, Polygon + +from casexml.apps.case.mock import CaseBlock +from dimagi.utils.couch.database import get_safe_write_kwargs + +from corehq.apps.geospatial.utils import ( + get_geo_case_property, + get_geo_user_property, +) +from corehq.apps.hqcase.utils import submit_case_blocks +from corehq.apps.users.models import CommCareUser + +CASE_TYPE = 'test-case' +CASE_BLOCK_CHUNK_SIZE = 100 +SCRIPT_NAME = 'corehq.apps.geospatial...create_test_metadata' + + +class Command(BaseCommand): + help = 'Create geo-located test data' + + def add_arguments(self, parser): + parser.add_argument('domain') + parser.add_argument('users', type=int) + parser.add_argument('cases', type=int) + + def handle(self, *args, **options): + domain = options['domain'] + num_users = options['users'] + num_cases = options['cases'] + + self.stdout.write(f'Creating {num_users} users for domain {domain}') + create_users(domain, num_users) + + self.stdout.write(f'Creating {num_cases} cases for domain {domain}') + create_cases(domain, num_cases) + + +def create_users(domain, num_users): + geo_property = get_geo_user_property(domain) + for __ in range(num_users): + create_user(domain, geo_property) + + +def create_user(domain, geo_property): + random_point = random_point_in_scotland() + random_suffix = random.randint(10_000, 99_999) + username = '.'.join(( + random.choice(FIRST_NAMES), + random.choice(LAST_NAMES), + str(random_suffix), + )) + mobile_username = f'{username}@{domain}.commcarehq.org' + password = '123' + user = CommCareUser.create( + domain, + mobile_username, + password, + created_by=None, + created_via=SCRIPT_NAME, + user_data={geo_property: f'{random_point.y} {random_point.x} 0 0'}, + commiit=False, # Save below to avoid logging + ) + user.save(**get_safe_write_kwargs()) + + +def random_point_in_scotland(): + min_x, min_y, max_x, max_y = ROUGHLY_SCOTLAND.bounds + while True: + random_x = random.uniform(min_x, max_x) + random_y = random.uniform(min_y, max_y) + random_point = Point(random_x, random_y) + if ROUGHLY_SCOTLAND.contains(random_point): + return random_point + + +def create_cases(domain, num_cases): + geo_property = get_geo_case_property(domain) + i = 0 + case_blocks = [] + while True: + if i == num_cases: + break + i += 1 + case_blocks.append(get_case_block(geo_property)) + if not i % CASE_BLOCK_CHUNK_SIZE: + submit_chunk(domain, case_blocks) + case_blocks = [] + if case_blocks: + submit_chunk(domain, case_blocks) + + +def get_case_block(geo_property): + case_id = uuid4().hex + case_name = f'{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}' + random_point = random_point_in_scotland() + return CaseBlock( + case_id=case_id, + case_type=CASE_TYPE, + case_name=case_name, + create=True, + update={geo_property: f'{random_point.y} {random_point.x} 0 0'}, + ) + + +def submit_chunk(domain, case_blocks): + submit_case_blocks( + [cb.as_text() for cb in case_blocks], + domain, + device_id=SCRIPT_NAME, + ) + + +ROUGHLY_SCOTLAND = Polygon([ # Sorry all of the islands + (58.421036, -4.910423), + (58.589584, -3.165290), + (57.449559, -4.344813), + (57.622931, -1.927251), + (55.945067, -3.356255), + (55.802299, -2.093642), + (55.802299, -2.093642), +]) + + +FIRST_NAMES = [ + 'Aaliyah', + 'Aaron', + 'Abigail', + 'Addison', + 'Aiden', + 'Alexander', + 'Amelia', + 'Andrew', + 'Anthony', + 'Aria', + 'Aubrey', + 'Audrey', + 'Aurora', + 'Ava', + 'Avery', + 'Bella', + 'Benjamin', + 'Brooklyn', + 'Caleb', + 'Carter', + 'Charles', + 'Charlotte', + 'Chloe', + 'Christian', + 'Christopher', + 'Claire', + 'Connor', + 'Daniel', + 'David', + 'Dylan', + 'Eli', + 'Ella', + 'Ellie', + 'Emma', + 'Ethan', + 'Evelyn', + 'Gabriel', + 'Genesis', + 'Grace', + 'Grayson', + 'Hannah', + 'Harper', + 'Hazel', + 'Henry', + 'Hunter', + 'Isaac', + 'Isabella', + 'Isaiah', + 'Jack', + 'Jackson', + 'James', + 'Jameson', + 'Jaxon', + 'John', + 'Joseph', + 'Joshua', + 'Julian', + 'Kennedy', + 'Kinsley', + 'Landon', + 'Layla', + 'Leah', + 'Levi', + 'Liam', + 'Lillian', + 'Lily', + 'Lincoln', + 'Logan', + 'Lucas', + 'Lucy', + 'Luke', + 'Madison', + 'Matthew', + 'Mia', + 'Michael', + 'Mila', + 'Natalie', + 'Nathan', + 'Noah', + 'Nora', + 'Olivia', + 'Owen', + 'Paisley', + 'Penelope', + 'Riley', + 'Ryan', + 'Samantha', + 'Samuel', + 'Savannah', + 'Scarlett', + 'Sebastian', + 'Skylar', + 'Sophia', + 'Stella', + 'Thomas', + 'Victoria', + 'Violet', + 'William', + 'Wyatt', + 'Zoey', +] + +LAST_NAMES = [ + 'Anderson', + 'Brown', + 'Cameron', + 'Campbell', + 'Davis', + 'Duncan', + 'Ferguson', + 'Fraser', + 'Garcia', + 'Gonzalez', + 'Graham', + 'Hamilton', + 'Harris', + 'Henderson', + 'Hernandez', + 'Jackson', + 'Johnson', + 'Johnston', + 'Jones', + 'Lee', + 'Lopez', + 'MacDonald', + 'MacKenzie', + 'MacLeod', + 'Martin', + 'Martinez', + 'Miller', + 'Moore', + 'Morrison', + 'Murray', + 'Paterson', + 'Perez', + 'Reid', + 'Robertson', + 'Rodriguez', + 'Ross', + 'Scott', + 'Sinclair', + 'Smith', + 'Stewart', + 'Sutherland', + 'Taylor', + 'Thomas', + 'Thompson', + 'Thomson', + 'Wallace', + 'White', + 'Williams', + 'Wilson', + 'Young', +] From 4c2b5a6f7f96b4edc3f496e096e5287250fb639a Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 10 Dec 2024 13:51:39 +0200 Subject: [PATCH 2/6] Bump it up to 1,000 case blocks in a chunk --- corehq/apps/geospatial/management/commands/create_test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corehq/apps/geospatial/management/commands/create_test_data.py b/corehq/apps/geospatial/management/commands/create_test_data.py index df2c312f2a2c..3fd8f4fe1378 100644 --- a/corehq/apps/geospatial/management/commands/create_test_data.py +++ b/corehq/apps/geospatial/management/commands/create_test_data.py @@ -16,7 +16,7 @@ from corehq.apps.users.models import CommCareUser CASE_TYPE = 'test-case' -CASE_BLOCK_CHUNK_SIZE = 100 +CASE_BLOCK_CHUNK_SIZE = 1000 SCRIPT_NAME = 'corehq.apps.geospatial...create_test_metadata' From 345d6d8a4e5fd1e5491726f5337294e0245f6b3f Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 10 Dec 2024 13:59:24 +0200 Subject: [PATCH 3/6] Exammple script to spawn 10 jobs to create cases --- scripts/exec-background.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100755 scripts/exec-background.sh diff --git a/scripts/exec-background.sh b/scripts/exec-background.sh new file mode 100755 index 000000000000..625188ca02c3 --- /dev/null +++ b/scripts/exec-background.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Spawn 10 background jobs +for i in {1..10}; do + # Create 0 users and 10,000 cases in the `test` domain + ./manage.py create_test_data test 0 10000 & +done + +# Wait for all background jobs to finish +wait From 3cb629a66ca7e5b4c7b937732ec1c535307c3e27 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 11 Dec 2024 11:39:21 +0200 Subject: [PATCH 4/6] Use Faker, and other feedback --- .../management/commands/create_test_data.py | 197 ++---------------- 1 file changed, 22 insertions(+), 175 deletions(-) diff --git a/corehq/apps/geospatial/management/commands/create_test_data.py b/corehq/apps/geospatial/management/commands/create_test_data.py index 3fd8f4fe1378..bcf91e6bc111 100644 --- a/corehq/apps/geospatial/management/commands/create_test_data.py +++ b/corehq/apps/geospatial/management/commands/create_test_data.py @@ -3,6 +3,7 @@ from django.core.management.base import BaseCommand +from faker import Faker from shapely.geometry import Point, Polygon from casexml.apps.case.mock import CaseBlock @@ -25,8 +26,8 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('domain') - parser.add_argument('users', type=int) - parser.add_argument('cases', type=int) + parser.add_argument('users', type=nonnegative_int) + parser.add_argument('cases', type=nonnegative_int) def handle(self, *args, **options): domain = options['domain'] @@ -40,6 +41,13 @@ def handle(self, *args, **options): create_cases(domain, num_cases) +def nonnegative_int(value): + value = int(value) + if value < 0: + raise ValueError('Value must be positive or zero') + return value + + def create_users(domain, num_users): geo_property = get_geo_user_property(domain) for __ in range(num_users): @@ -47,21 +55,22 @@ def create_users(domain, num_users): def create_user(domain, geo_property): - random_point = random_point_in_scotland() - random_suffix = random.randint(10_000, 99_999) - username = '.'.join(( - random.choice(FIRST_NAMES), - random.choice(LAST_NAMES), - str(random_suffix), - )) + fake = Faker() + first_name = fake.first_name() + last_name = fake.last_name() + random_suffix = str(random.randint(10_000, 99_999)) + username = '.'.join((first_name, last_name, random_suffix)) mobile_username = f'{username}@{domain}.commcarehq.org' password = '123' + random_point = random_point_in_scotland() user = CommCareUser.create( domain, mobile_username, password, created_by=None, created_via=SCRIPT_NAME, + first_name=first_name, + last_name=last_name, user_data={geo_property: f'{random_point.y} {random_point.x} 0 0'}, commiit=False, # Save below to avoid logging ) @@ -80,12 +89,8 @@ def random_point_in_scotland(): def create_cases(domain, num_cases): geo_property = get_geo_case_property(domain) - i = 0 case_blocks = [] - while True: - if i == num_cases: - break - i += 1 + for i in range(num_cases): case_blocks.append(get_case_block(geo_property)) if not i % CASE_BLOCK_CHUNK_SIZE: submit_chunk(domain, case_blocks) @@ -95,13 +100,12 @@ def create_cases(domain, num_cases): def get_case_block(geo_property): - case_id = uuid4().hex - case_name = f'{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}' + fake = Faker() random_point = random_point_in_scotland() return CaseBlock( - case_id=case_id, + case_id=uuid4().hex, case_type=CASE_TYPE, - case_name=case_name, + case_name=fake.name(), create=True, update={geo_property: f'{random_point.y} {random_point.x} 0 0'}, ) @@ -124,160 +128,3 @@ def submit_chunk(domain, case_blocks): (55.802299, -2.093642), (55.802299, -2.093642), ]) - - -FIRST_NAMES = [ - 'Aaliyah', - 'Aaron', - 'Abigail', - 'Addison', - 'Aiden', - 'Alexander', - 'Amelia', - 'Andrew', - 'Anthony', - 'Aria', - 'Aubrey', - 'Audrey', - 'Aurora', - 'Ava', - 'Avery', - 'Bella', - 'Benjamin', - 'Brooklyn', - 'Caleb', - 'Carter', - 'Charles', - 'Charlotte', - 'Chloe', - 'Christian', - 'Christopher', - 'Claire', - 'Connor', - 'Daniel', - 'David', - 'Dylan', - 'Eli', - 'Ella', - 'Ellie', - 'Emma', - 'Ethan', - 'Evelyn', - 'Gabriel', - 'Genesis', - 'Grace', - 'Grayson', - 'Hannah', - 'Harper', - 'Hazel', - 'Henry', - 'Hunter', - 'Isaac', - 'Isabella', - 'Isaiah', - 'Jack', - 'Jackson', - 'James', - 'Jameson', - 'Jaxon', - 'John', - 'Joseph', - 'Joshua', - 'Julian', - 'Kennedy', - 'Kinsley', - 'Landon', - 'Layla', - 'Leah', - 'Levi', - 'Liam', - 'Lillian', - 'Lily', - 'Lincoln', - 'Logan', - 'Lucas', - 'Lucy', - 'Luke', - 'Madison', - 'Matthew', - 'Mia', - 'Michael', - 'Mila', - 'Natalie', - 'Nathan', - 'Noah', - 'Nora', - 'Olivia', - 'Owen', - 'Paisley', - 'Penelope', - 'Riley', - 'Ryan', - 'Samantha', - 'Samuel', - 'Savannah', - 'Scarlett', - 'Sebastian', - 'Skylar', - 'Sophia', - 'Stella', - 'Thomas', - 'Victoria', - 'Violet', - 'William', - 'Wyatt', - 'Zoey', -] - -LAST_NAMES = [ - 'Anderson', - 'Brown', - 'Cameron', - 'Campbell', - 'Davis', - 'Duncan', - 'Ferguson', - 'Fraser', - 'Garcia', - 'Gonzalez', - 'Graham', - 'Hamilton', - 'Harris', - 'Henderson', - 'Hernandez', - 'Jackson', - 'Johnson', - 'Johnston', - 'Jones', - 'Lee', - 'Lopez', - 'MacDonald', - 'MacKenzie', - 'MacLeod', - 'Martin', - 'Martinez', - 'Miller', - 'Moore', - 'Morrison', - 'Murray', - 'Paterson', - 'Perez', - 'Reid', - 'Robertson', - 'Rodriguez', - 'Ross', - 'Scott', - 'Sinclair', - 'Smith', - 'Stewart', - 'Sutherland', - 'Taylor', - 'Thomas', - 'Thompson', - 'Thomson', - 'Wallace', - 'White', - 'Williams', - 'Wilson', - 'Young', -] From bfcb3e0b40818e2441715896ac209a7118118bdc Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 11 Dec 2024 11:39:49 +0200 Subject: [PATCH 5/6] Fix typo --- corehq/apps/geospatial/management/commands/create_test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corehq/apps/geospatial/management/commands/create_test_data.py b/corehq/apps/geospatial/management/commands/create_test_data.py index bcf91e6bc111..c2faa2988b06 100644 --- a/corehq/apps/geospatial/management/commands/create_test_data.py +++ b/corehq/apps/geospatial/management/commands/create_test_data.py @@ -72,7 +72,7 @@ def create_user(domain, geo_property): first_name=first_name, last_name=last_name, user_data={geo_property: f'{random_point.y} {random_point.x} 0 0'}, - commiit=False, # Save below to avoid logging + commit=False, # Save below to avoid logging ) user.save(**get_safe_write_kwargs()) From fe29ba81c50e52d661a9fb6a7e11886831b42ab0 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Fri, 13 Dec 2024 18:06:16 +0200 Subject: [PATCH 6/6] Add docstring, drop unnecessary script --- .../management/commands/create_test_data.py | 30 +++++++++++++++++++ scripts/exec-background.sh | 10 ------- 2 files changed, 30 insertions(+), 10 deletions(-) delete mode 100755 scripts/exec-background.sh diff --git a/corehq/apps/geospatial/management/commands/create_test_data.py b/corehq/apps/geospatial/management/commands/create_test_data.py index c2faa2988b06..e2ece67c8552 100644 --- a/corehq/apps/geospatial/management/commands/create_test_data.py +++ b/corehq/apps/geospatial/management/commands/create_test_data.py @@ -1,3 +1,33 @@ +""" +Create geo-located mobile workers and geo-located cases for testing +national-scale bulk disbursement for microplanning. + +This script uses Faker and Shapely, which are not installed in +production environments. + +To use this management command, do a limited-release deploy, and then +install the additional requirements in its virtualenv. e.g. :: + + $ cchq --control staging deploy commcare \ + --private \ + --limit='django_manage[0]' \ + --keep-days=7 \ + --commcare-rev=nh/test_data + ... + Your private release is located here: + /home/cchq/www/staging/releases/2024-12-11_15.07 + + $ cchq staging tmux 'django_manage[0]' + + cchq:~$ cd www/staging/releases/2024-12-11_15.07 + cchq:~$ source python_env/bin/activate + cchq:~$ pip install Faker + cchq:~$ pip install shapely + +The django_manage machine has two cores. To execute the management +command on both cores, simply run the command in two tmux windows. + +""" import random from uuid import uuid4 diff --git a/scripts/exec-background.sh b/scripts/exec-background.sh deleted file mode 100755 index 625188ca02c3..000000000000 --- a/scripts/exec-background.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -# Spawn 10 background jobs -for i in {1..10}; do - # Create 0 users and 10,000 cases in the `test` domain - ./manage.py create_test_data test 0 10000 & -done - -# Wait for all background jobs to finish -wait