From fdc1ace30dcbdaf4d25c22485a4fdcd773628696 Mon Sep 17 00:00:00 2001 From: Matthew Larson Date: Tue, 16 Apr 2024 09:29:41 -0500 Subject: [PATCH 1/6] Add max scan duration to config --- .github/workflows/python-package.yml | 32 +++++++++++++++++++-- admin/config/config.yml | 3 +- admin/docker/docker-compose-internal-lb.yml | 6 ++++ admin/docker/docker-compose.aws.yml | 6 ++++ admin/docker/docker-compose.azure.yml | 6 ++++ admin/docker/docker-compose.posix.yml | 8 ++++++ admin/docker/docker-compose.yml | 6 ++++ hsds/domain_sn.py | 9 ++++-- 8 files changed, 70 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f31df7e1..9b7fff5b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -64,6 +64,10 @@ jobs: USER2_PASSWORD: test HSDS_ENDPOINT: http://127.0.0.1:5101 BUCKET_NAME: hsdstest + MAX_SCAN_DURATION: 500 + FLUSH_TIMEOUT: 300 + FLUSH_SLEEP_INTERVAL: 5 + LOG_LEVEL: DEBUG run: | mkdir data mkdir data/hsdstest @@ -83,6 +87,10 @@ jobs: USER2_PASSWORD: test HSDS_ENDPOINT: http://127.0.0.1:5101 BUCKET_NAME: hsdstest + MAX_SCAN_DURATION: 500 + FLUSH_TIMEOUT: 300 + FLUSH_SLEEP_INTERVAL: 5 + LOG_LEVEL: DEBUG run: | mkdir data mkdir data/hsdstest @@ -98,6 +106,7 @@ jobs: - name: Run HSDS tests if: ${{!(matrix.build-method == 'docker' && matrix.os == 'windows-latest')}} + id: hsds-tests shell: bash env: ADMIN_PASSWORD: admin @@ -107,9 +116,28 @@ jobs: USER2_NAME: test_user2 USER2_PASSWORD: test BUCKET_NAME: hsdstest + MAX_SCAN_DURATION: 500 + FLUSH_TIMEOUT: 300 + FLUSH_SLEEP_INTERVAL: 5 + LOG_LEVEL: DEBUG run: | - pytest tests/integ/setup_test.py - pytest tests/integ + python testall.py + + - name: Show HSDS Logs on Fail (Docker) + # Only run if the whole workflow failed due to HSDS tests + if: ${{failure() && steps.hsds-tests.outcome == 'failure' && (matrix.build-method == 'docker' && matrix.os != 'windows-latest')}} + run: | + docker logs hsds-sn-1 >&1 + docker logs hsds-dn-1 >&1 + docker logs hsds-dn-2 >&1 + docker logs hsds-dn-3 >&1 + docker logs hsds-dn-4 >&1 + + - name: Show HSDS Logs on Fail (Manual) + # Only run if the whole workflow failed due to HSDS tests + if: ${{failure() && steps.hsds-tests.outcome == 'failure' && (matrix.build-method == 'manual')}} + run: | + cat hs.log - name: Checkout h5pyd if: ${{ ( matrix.os != 'windows-latest' ) }} diff --git a/admin/config/config.yml b/admin/config/config.yml index 18a7b1da..b5cada02 100755 --- a/admin/config/config.yml +++ b/admin/config/config.yml @@ -33,8 +33,9 @@ max_tcp_connections: 100 # max number of inflight tcp connections head_sleep_time: 10 # max sleep time between health checks for head node node_sleep_time: 10 # max sleep time between health checks for SN/DN nodes async_sleep_time: 1 # max sleep time between async task runs -scan_sleep_time: 10 # max sleep time between scaning runs +scan_sleep_time: 10 # max sleep time between scanning runs scan_wait_time: 10 # min time to wait after a domain update before starting a scan +max_scan_duration: 180 # max time to wait for a scan to complete before raising error gc_sleep_time: 10 # max time between runs to delete unused objects s3_sync_interval: 1 # time to wait between s3_sync checks (in sec) s3_age_time: 1 # time to wait since last update to write an object to S3 diff --git a/admin/docker/docker-compose-internal-lb.yml b/admin/docker/docker-compose-internal-lb.yml index 7cf10004..0656e8b9 100644 --- a/admin/docker/docker-compose-internal-lb.yml +++ b/admin/docker/docker-compose-internal-lb.yml @@ -27,6 +27,9 @@ services: - NODE_TYPE=dn - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} logging: @@ -47,6 +50,9 @@ services: - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - HSDS_ENDPOINT=${HSDS_ENDPOINT} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} diff --git a/admin/docker/docker-compose.aws.yml b/admin/docker/docker-compose.aws.yml index 2e037edb..b0357e3a 100644 --- a/admin/docker/docker-compose.aws.yml +++ b/admin/docker/docker-compose.aws.yml @@ -31,6 +31,9 @@ services: - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} depends_on: @@ -54,6 +57,9 @@ services: - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} - HSDS_ENDPOINT=${HSDS_ENDPOINT} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} depends_on: diff --git a/admin/docker/docker-compose.azure.yml b/admin/docker/docker-compose.azure.yml index 993d287e..9c7045e2 100644 --- a/admin/docker/docker-compose.azure.yml +++ b/admin/docker/docker-compose.azure.yml @@ -25,6 +25,9 @@ services: - AZURE_CONNECTION_STRING=${AZURE_CONNECTION_STRING} - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} depends_on: @@ -45,6 +48,9 @@ services: - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} - HSDS_ENDPOINT=${HSDS_ENDPOINT} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} depends_on: diff --git a/admin/docker/docker-compose.posix.yml b/admin/docker/docker-compose.posix.yml index c07d9353..930f1f49 100644 --- a/admin/docker/docker-compose.posix.yml +++ b/admin/docker/docker-compose.posix.yml @@ -24,6 +24,10 @@ services: - NODE_TYPE=dn - ROOT_DIR=/data - BUCKET_NAME=${BUCKET_NAME} + - LOG_LEVEL=${LOG_LEVEL} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} depends_on: @@ -43,6 +47,10 @@ services: - ROOT_DIR=/data - BUCKET_NAME=${BUCKET_NAME} - HSDS_ENDPOINT=${HSDS_ENDPOINT} + - LOG_LEVEL=${LOG_LEVEL} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} depends_on: diff --git a/admin/docker/docker-compose.yml b/admin/docker/docker-compose.yml index cc63fefd..3a00b6e0 100644 --- a/admin/docker/docker-compose.yml +++ b/admin/docker/docker-compose.yml @@ -31,6 +31,9 @@ services: - AZURE_CONNECTION_STRING=${AZURE_CONNECTION_STRING} - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} depends_on: @@ -54,6 +57,9 @@ services: - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} - HSDS_ENDPOINT=${HSDS_ENDPOINT} + - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} + - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} + - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} depends_on: diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py index cc35c59f..1e54a589 100755 --- a/hsds/domain_sn.py +++ b/hsds/domain_sn.py @@ -902,19 +902,22 @@ async def PUT_Domain(request): if bucket: post_params["bucket"] = bucket req_send_time = getNow(app) + log.debug(f"Sending rescan request at time {req_send_time}") await http_post(app, notify_req, data={}, params=post_params) # Poll until the scan_complete time is greater than # req_send_time or 3 minutes have elapsed - MAX_WAIT_TIME = 180 + max_scan_duration = int(config.get("max_scan_duration", default=180)) RESCAN_SLEEP_TIME = 0.1 while True: scan_time = await getScanTime(app, root_id, bucket=bucket) + log.debug(f"Most recent scan on domain {root_id} completed at time {scan_time}") if scan_time > req_send_time: log.info(f"scan complete for root: {root_id}") break - if getNow(app) - req_send_time > MAX_WAIT_TIME: - log.warn(f"scan failed to complete in {MAX_WAIT_TIME} seconds for {root_id}") + if getNow(app) - req_send_time > max_scan_duration: + log.warn(f"scan failed to complete in {max_scan_duration}\ + seconds for {root_id}") raise HTTPServiceUnavailable() log.debug(f"do_rescan sleeping for {RESCAN_SLEEP_TIME}s") await asyncio.sleep(RESCAN_SLEEP_TIME) # avoid busy wait From 2a7f57a519795a44576c735298a0750c5ce5d0b3 Mon Sep 17 00:00:00 2001 From: Matthew Larson Date: Tue, 16 Apr 2024 14:12:19 -0500 Subject: [PATCH 2/6] Start rescan with brief wait --- .github/workflows/python-package.yml | 9 --------- hsds/config.py | 2 +- hsds/domain_sn.py | 8 +++++++- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 9b7fff5b..575a5888 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -64,9 +64,6 @@ jobs: USER2_PASSWORD: test HSDS_ENDPOINT: http://127.0.0.1:5101 BUCKET_NAME: hsdstest - MAX_SCAN_DURATION: 500 - FLUSH_TIMEOUT: 300 - FLUSH_SLEEP_INTERVAL: 5 LOG_LEVEL: DEBUG run: | mkdir data @@ -87,9 +84,6 @@ jobs: USER2_PASSWORD: test HSDS_ENDPOINT: http://127.0.0.1:5101 BUCKET_NAME: hsdstest - MAX_SCAN_DURATION: 500 - FLUSH_TIMEOUT: 300 - FLUSH_SLEEP_INTERVAL: 5 LOG_LEVEL: DEBUG run: | mkdir data @@ -116,9 +110,6 @@ jobs: USER2_NAME: test_user2 USER2_PASSWORD: test BUCKET_NAME: hsdstest - MAX_SCAN_DURATION: 500 - FLUSH_TIMEOUT: 300 - FLUSH_SLEEP_INTERVAL: 5 LOG_LEVEL: DEBUG run: | python testall.py diff --git a/hsds/config.py b/hsds/config.py index bbbf4a95..9572ff34 100755 --- a/hsds/config.py +++ b/hsds/config.py @@ -147,7 +147,7 @@ def _load_cfg(): override = yml_override[x] debug(f"got config override for {x}") - if override is not None: + if override is not None and override != "": if cfgval is not None: try: # convert to same type as yaml diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py index 1e54a589..28fc38ad 100755 --- a/hsds/domain_sn.py +++ b/hsds/domain_sn.py @@ -903,12 +903,18 @@ async def PUT_Domain(request): post_params["bucket"] = bucket req_send_time = getNow(app) log.debug(f"Sending rescan request at time {req_send_time}") - await http_post(app, notify_req, data={}, params=post_params) # Poll until the scan_complete time is greater than # req_send_time or 3 minutes have elapsed max_scan_duration = int(config.get("max_scan_duration", default=180)) RESCAN_SLEEP_TIME = 0.1 + INITIAL_SCAN_SLEEP_TIME = 0.4 + + # Start with brief wait to avoid time discrepancies between nodes + asyncio.sleep(INITIAL_SCAN_SLEEP_TIME) + + await http_post(app, notify_req, data={}, params=post_params) + while True: scan_time = await getScanTime(app, root_id, bucket=bucket) log.debug(f"Most recent scan on domain {root_id} completed at time {scan_time}") From 102c25d4dd16483f37fdf6c68f27fb129a1e632f Mon Sep 17 00:00:00 2001 From: Matthew Larson Date: Tue, 16 Apr 2024 16:38:07 -0500 Subject: [PATCH 3/6] Default to time.time() on POSIX --- admin/docker/docker-compose-internal-lb.yml | 6 ------ admin/docker/docker-compose.aws.yml | 6 ------ admin/docker/docker-compose.azure.yml | 6 ------ admin/docker/docker-compose.posix.yml | 8 -------- admin/docker/docker-compose.yml | 6 ------ hsds/util/timeUtil.py | 15 +++++++++++++- tests/integ/broadcast_test.py | 22 --------------------- tests/integ/value_test.py | 13 +++++++----- 8 files changed, 22 insertions(+), 60 deletions(-) diff --git a/admin/docker/docker-compose-internal-lb.yml b/admin/docker/docker-compose-internal-lb.yml index 0656e8b9..7cf10004 100644 --- a/admin/docker/docker-compose-internal-lb.yml +++ b/admin/docker/docker-compose-internal-lb.yml @@ -27,9 +27,6 @@ services: - NODE_TYPE=dn - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} logging: @@ -50,9 +47,6 @@ services: - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - HSDS_ENDPOINT=${HSDS_ENDPOINT} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} diff --git a/admin/docker/docker-compose.aws.yml b/admin/docker/docker-compose.aws.yml index b0357e3a..2e037edb 100644 --- a/admin/docker/docker-compose.aws.yml +++ b/admin/docker/docker-compose.aws.yml @@ -31,9 +31,6 @@ services: - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} depends_on: @@ -57,9 +54,6 @@ services: - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} - HSDS_ENDPOINT=${HSDS_ENDPOINT} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} depends_on: diff --git a/admin/docker/docker-compose.azure.yml b/admin/docker/docker-compose.azure.yml index 9c7045e2..993d287e 100644 --- a/admin/docker/docker-compose.azure.yml +++ b/admin/docker/docker-compose.azure.yml @@ -25,9 +25,6 @@ services: - AZURE_CONNECTION_STRING=${AZURE_CONNECTION_STRING} - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} depends_on: @@ -48,9 +45,6 @@ services: - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} - HSDS_ENDPOINT=${HSDS_ENDPOINT} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} depends_on: diff --git a/admin/docker/docker-compose.posix.yml b/admin/docker/docker-compose.posix.yml index 930f1f49..c07d9353 100644 --- a/admin/docker/docker-compose.posix.yml +++ b/admin/docker/docker-compose.posix.yml @@ -24,10 +24,6 @@ services: - NODE_TYPE=dn - ROOT_DIR=/data - BUCKET_NAME=${BUCKET_NAME} - - LOG_LEVEL=${LOG_LEVEL} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} depends_on: @@ -47,10 +43,6 @@ services: - ROOT_DIR=/data - BUCKET_NAME=${BUCKET_NAME} - HSDS_ENDPOINT=${HSDS_ENDPOINT} - - LOG_LEVEL=${LOG_LEVEL} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} depends_on: diff --git a/admin/docker/docker-compose.yml b/admin/docker/docker-compose.yml index 3a00b6e0..cc63fefd 100644 --- a/admin/docker/docker-compose.yml +++ b/admin/docker/docker-compose.yml @@ -31,9 +31,6 @@ services: - AZURE_CONNECTION_STRING=${AZURE_CONNECTION_STRING} - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${DN_PORT} depends_on: @@ -57,9 +54,6 @@ services: - BUCKET_NAME=${BUCKET_NAME} - LOG_LEVEL=${LOG_LEVEL} - HSDS_ENDPOINT=${HSDS_ENDPOINT} - - MAX_SCAN_DURATION=${MAX_SCAN_DURATION} - - FLUSH_TIMEOUT=${FLUSH_TIMEOUT} - - FLUSH_SLEEP_INTERVAL=${FLUSH_SLEEP_INTERVAL} ports: - ${SN_PORT_RANGE}:${SN_PORT} depends_on: diff --git a/hsds/util/timeUtil.py b/hsds/util/timeUtil.py index a1e904ec..e4ae9d3f 100755 --- a/hsds/util/timeUtil.py +++ b/hsds/util/timeUtil.py @@ -11,6 +11,7 @@ ############################################################################## from datetime import datetime import time +import os import pytz @@ -67,4 +68,16 @@ def getNow(app): Returns a precise timestamp even on platforms where time.time() has low resolution (e.g. Windows) """ - return (time.perf_counter() - app["start_time_relative"]) + app["start_time"] + system = os.name + current_time = 0 + + if system == "nt": + # Windows + current_time = (time.perf_counter() - app["start_time_relative"]) + app["start_time"] + elif system == "posix": + # Unix + current_time = time.time() + else: + raise ValueError(f"Unsupported OS: {system}") + + return current_time diff --git a/tests/integ/broadcast_test.py b/tests/integ/broadcast_test.py index f480e637..e780a4b4 100755 --- a/tests/integ/broadcast_test.py +++ b/tests/integ/broadcast_test.py @@ -36,28 +36,6 @@ def getRootUUID(self, domain, username=None, password=None): domain, username=username, password=password, session=self.session ) - def checkVerbose(self, dset_id, headers=None, expected=None): - # do a flush with rescan, then check the expected return values are correct - req = f"{self.endpoint}/" - params = {"flush": 1, "rescan": 1} - rsp = self.session.put(req, params=params, headers=headers) - # should get a NO_CONTENT code, - self.assertEqual(rsp.status_code, 204) - - # do a get and verify the additional keys are - req = f"{self.endpoint}/datasets/{dset_id}" - params = {"verbose": 1} - - rsp = self.session.get(req, params=params, headers=headers) - self.assertEqual(rsp.status_code, 200) - rspJson = json.loads(rsp.text) - - for k in expected: - self.assertTrue(k in rspJson) - self.assertEqual(rspJson[k], expected[k]) - - # main - def testPut1DDataset(self): # Test PUT value with broadcast for 1d dataset print("testPut1DDataset", self.base_domain) diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py index d1925fd2..f43517ec 100755 --- a/tests/integ/value_test.py +++ b/tests/integ/value_test.py @@ -40,11 +40,16 @@ def getRootUUID(self, domain, username=None, password=None): def checkVerbose(self, dset_id, headers=None, expected=None): # do a flush with rescan, then check the expected return values are correct + num_retries = 5 req = f"{self.endpoint}/" params = {"flush": 1, "rescan": 1} - rsp = self.session.put(req, params=params, headers=headers) - # should get a NO_CONTENT code, - self.assertEqual(rsp.status_code, 204) + for i in range(num_retries): + rsp = self.session.put(req, params=params, headers=headers) + if (rsp.status_code == 503): + # Retry + continue + # should get a NO_CONTENT code + self.assertEqual(rsp.status_code, 204) # do a get and verify the additional keys are req = f"{self.endpoint}/datasets/{dset_id}" @@ -58,8 +63,6 @@ def checkVerbose(self, dset_id, headers=None, expected=None): self.assertTrue(k in rspJson) self.assertEqual(rspJson[k], expected[k]) - # main - def testPut1DDataset(self): # Test PUT value for 1d dataset print("testPut1DDataset", self.base_domain) From 2dce7830fd11867df33f407a51f389f48ac05099 Mon Sep 17 00:00:00 2001 From: Matthew Larson Date: Wed, 17 Apr 2024 09:21:13 -0500 Subject: [PATCH 4/6] Remove initial sleep wait --- hsds/domain_sn.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py index 28fc38ad..cf2f911f 100755 --- a/hsds/domain_sn.py +++ b/hsds/domain_sn.py @@ -903,17 +903,12 @@ async def PUT_Domain(request): post_params["bucket"] = bucket req_send_time = getNow(app) log.debug(f"Sending rescan request at time {req_send_time}") + await http_post(app, notify_req, data={}, params=post_params) # Poll until the scan_complete time is greater than # req_send_time or 3 minutes have elapsed max_scan_duration = int(config.get("max_scan_duration", default=180)) RESCAN_SLEEP_TIME = 0.1 - INITIAL_SCAN_SLEEP_TIME = 0.4 - - # Start with brief wait to avoid time discrepancies between nodes - asyncio.sleep(INITIAL_SCAN_SLEEP_TIME) - - await http_post(app, notify_req, data={}, params=post_params) while True: scan_time = await getScanTime(app, root_id, bucket=bucket) From 4b7d7c7af2836a1cbd22005f622aa92cc209d5e6 Mon Sep 17 00:00:00 2001 From: Matthew Larson Date: Wed, 17 Apr 2024 10:14:39 -0500 Subject: [PATCH 5/6] Avoid infinite scan with low-granularity time --- hsds/domain_sn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py index cf2f911f..808cf59a 100755 --- a/hsds/domain_sn.py +++ b/hsds/domain_sn.py @@ -913,7 +913,7 @@ async def PUT_Domain(request): while True: scan_time = await getScanTime(app, root_id, bucket=bucket) log.debug(f"Most recent scan on domain {root_id} completed at time {scan_time}") - if scan_time > req_send_time: + if scan_time >= req_send_time: log.info(f"scan complete for root: {root_id}") break if getNow(app) - req_send_time > max_scan_duration: From ade4b9dab8d9eacf74fb5d8fe7aa0b37fee9733f Mon Sep 17 00:00:00 2001 From: Matthew Larson Date: Wed, 17 Apr 2024 10:30:07 -0500 Subject: [PATCH 6/6] Remove unneeded repeat scan in tests --- tests/integ/value_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py index f43517ec..c2bd68bf 100755 --- a/tests/integ/value_test.py +++ b/tests/integ/value_test.py @@ -50,6 +50,7 @@ def checkVerbose(self, dset_id, headers=None, expected=None): continue # should get a NO_CONTENT code self.assertEqual(rsp.status_code, 204) + break # do a get and verify the additional keys are req = f"{self.endpoint}/datasets/{dset_id}"