From d2959cfd2b350ce0d0f9b91b968f31178f47c5df Mon Sep 17 00:00:00 2001 From: Andrew Rowley Date: Thu, 30 Nov 2023 08:49:41 +0000 Subject: [PATCH 1/7] Attempt to avoid skipping of tests in whole board tests --- test_whole_board/test_whole_board.py | 77 +++++++++++++++++----------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/test_whole_board/test_whole_board.py b/test_whole_board/test_whole_board.py index fb84bcc09d..b163983492 100644 --- a/test_whole_board/test_whole_board.py +++ b/test_whole_board/test_whole_board.py @@ -12,16 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pyNN.spiNNaker as sim -import spynnaker -from spalloc_client.job import Job -from spalloc_client.states import JobState import pytest import tempfile import os import traceback import sys -import time +import logging +from time import sleep +from shutil import rmtree + +import pyNN.spiNNaker as sim +from spinnman.spalloc import SpallocClient, SpallocState class WholeBoardTest(object): @@ -57,6 +58,10 @@ class WholeBoardTest(object): dl = list(ur) dl.reverse() + def __init__(self): + self.to_allocate = None + self.targets = None + def find_a_placement(self): for count in range(17, 0, -1): for (x, y), processors in self.to_allocate.items(): @@ -187,40 +192,54 @@ def do_run(self): sim.end() -boards = [(x, y, b) for x in range(20) for y in range(20) for b in range(3)] +BOARDS = [(x, y, b) for x in range(20) for y in range(20) for b in range(3)] +SPALLOC_URL = "https://spinnaker.cs.man.ac.uk/spalloc" +SPALLOC_USERNAME = "jenkins" +SPALLOC_PASSWORD = os.getenv("SPALLOC_PASSWORD") +SPALLOC_MACHINE = "SpiNNaker1M" -@pytest.mark.parametrize("x,y,b", boards) +@pytest.mark.parametrize("x,y,b", BOARDS) def test_run(x, y, b): test_dir = os.path.dirname(__file__) - job = Job(x, y, b, hostname="spinnaker.cs.man.ac.uk", - owner="Jenkins Machine Test") - # Sleep before checking for queued in case of multiple jobs running - time.sleep(2.0) - if job.state == JobState.queued: - job.destroy("Queued") - pytest.skip(f"Board {x}, {y}, {b} is in use") - elif job.state == JobState.destroyed: - pytest.skip(f"Board {x}, {y}, {b} could not be allocated") + client = SpallocClient(SPALLOC_URL, SPALLOC_USERNAME, SPALLOC_PASSWORD) + job = client.create_job_board( + triad=(x, y, b), machine_name=SPALLOC_MACHINE) with job: - with tempfile.TemporaryDirectory( - prefix=f"{x}_{y}_{b}", dir=test_dir) as tmpdir: - os.chdir(tmpdir) - with open("spynnaker.cfg", "w", encoding="utf-8") as f: - f.write("[Machine]\n") - f.write("spalloc_server = None\n") - f.write(f"machine_name = {job.hostname}\n") - f.write("version = 5\n") - test = WholeBoardTest() - test.do_run() + job.launch_keepalive_task() + # Wait for not queued for up to 30 seconds + sleep(2.0) + state = job.get_state(wait_for_change=True) + # If queued or destroyed skip test + if state == SpallocState.QUEUED: + job.destroy("Queued") + pytest.skip(f"Some boards starting at {x}, {y}, {b} is in use") + elif state == SpallocState.DESTROYED: + pytest.skip(f"Boards {x}, {y}, {b} could not be allocated") + # Actually wait for ready now (as might be powering on) + job.wait_until_ready() + tmpdir = tempfile.mkdtemp(prefix=f"{x}_{y}_{b}", dir=test_dir) + os.chdir(tmpdir) + with open("spynnaker.cfg", "w", encoding="utf-8") as f: + f.write("[Machine]\n") + f.write("spalloc_server = None\n") + f.write(f"machine_name = {job.get_root_host()}\n") + f.write("version = 5\n") + test = WholeBoardTest() + test.do_run() + # If no errors we will get here and we can remove the tree; + # then only error folders will be left + rmtree(tmpdir) if __name__ == "__main__": - for x, y, b in boards: + logging.basicConfig(level=logging.DEBUG) + main_boards = [(0, 0, 0)] + for b_x, b_y, b_b in main_boards: print("", file=sys.stderr,) - print(f"*************** Testing {x}, {y}, {b} *******************", + print(f"************** Testing {b_x}, {b_y}, {b_b} ******************", file=sys.stderr) try: - test_run(x, y, b) + test_run(b_x, b_y, b_b) except Exception: traceback.print_exc() From 636f1e987407c660d677820d8632d1fb7bb0d3ac Mon Sep 17 00:00:00 2001 From: Andrew Rowley Date: Thu, 30 Nov 2023 11:35:31 +0000 Subject: [PATCH 2/7] Try a different approach --- test_whole_board/test_whole_board.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test_whole_board/test_whole_board.py b/test_whole_board/test_whole_board.py index b163983492..988f2f245f 100644 --- a/test_whole_board/test_whole_board.py +++ b/test_whole_board/test_whole_board.py @@ -207,8 +207,7 @@ def test_run(x, y, b): triad=(x, y, b), machine_name=SPALLOC_MACHINE) with job: job.launch_keepalive_task() - # Wait for not queued for up to 30 seconds - sleep(2.0) + job.wait_until_ready(10.0, 3) state = job.get_state(wait_for_change=True) # If queued or destroyed skip test if state == SpallocState.QUEUED: From 2ff85f75a69e023e96d40453b0c7fd95d39faf1b Mon Sep 17 00:00:00 2001 From: Andrew Rowley Date: Thu, 30 Nov 2023 11:57:24 +0000 Subject: [PATCH 3/7] Don't timeout but also don't retry forever --- test_whole_board/test_whole_board.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_whole_board/test_whole_board.py b/test_whole_board/test_whole_board.py index 988f2f245f..9f2307f3e3 100644 --- a/test_whole_board/test_whole_board.py +++ b/test_whole_board/test_whole_board.py @@ -207,8 +207,8 @@ def test_run(x, y, b): triad=(x, y, b), machine_name=SPALLOC_MACHINE) with job: job.launch_keepalive_task() - job.wait_until_ready(10.0, 3) - state = job.get_state(wait_for_change=True) + job.wait_until_ready(None, 3) + state = job.get_state() # If queued or destroyed skip test if state == SpallocState.QUEUED: job.destroy("Queued") From 2f74f4c82dfc1d4a8e564b2fb9d28e8ae958d7ba Mon Sep 17 00:00:00 2001 From: Andrew Rowley Date: Thu, 30 Nov 2023 14:34:18 +0000 Subject: [PATCH 4/7] Try with a timeout and hope it works! --- test_whole_board/test_whole_board.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_whole_board/test_whole_board.py b/test_whole_board/test_whole_board.py index 9f2307f3e3..8c70d983ca 100644 --- a/test_whole_board/test_whole_board.py +++ b/test_whole_board/test_whole_board.py @@ -207,7 +207,7 @@ def test_run(x, y, b): triad=(x, y, b), machine_name=SPALLOC_MACHINE) with job: job.launch_keepalive_task() - job.wait_until_ready(None, 3) + job.wait_until_ready(31, 3) state = job.get_state() # If queued or destroyed skip test if state == SpallocState.QUEUED: From 0bce677fc33a30dd1bbdda1cffe38bcce7f410a5 Mon Sep 17 00:00:00 2001 From: Andrew Rowley Date: Thu, 30 Nov 2023 15:52:46 +0000 Subject: [PATCH 5/7] Move to use new changes that appear to work --- test_whole_board/test_whole_board.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_whole_board/test_whole_board.py b/test_whole_board/test_whole_board.py index 8c70d983ca..3ab9a453d6 100644 --- a/test_whole_board/test_whole_board.py +++ b/test_whole_board/test_whole_board.py @@ -207,7 +207,7 @@ def test_run(x, y, b): triad=(x, y, b), machine_name=SPALLOC_MACHINE) with job: job.launch_keepalive_task() - job.wait_until_ready(31, 3) + job.wait_until_ready(n_retries=3) state = job.get_state() # If queued or destroyed skip test if state == SpallocState.QUEUED: From d249378222de51a6e74ee74d8749f26dca0f11e5 Mon Sep 17 00:00:00 2001 From: Andrew Rowley Date: Thu, 30 Nov 2023 16:09:55 +0000 Subject: [PATCH 6/7] Only really need a single retry --- test_whole_board/test_whole_board.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_whole_board/test_whole_board.py b/test_whole_board/test_whole_board.py index 3ab9a453d6..27c7558902 100644 --- a/test_whole_board/test_whole_board.py +++ b/test_whole_board/test_whole_board.py @@ -207,7 +207,7 @@ def test_run(x, y, b): triad=(x, y, b), machine_name=SPALLOC_MACHINE) with job: job.launch_keepalive_task() - job.wait_until_ready(n_retries=3) + job.wait_until_ready(n_retries=1) state = job.get_state() # If queued or destroyed skip test if state == SpallocState.QUEUED: From 9e07841a96eea1b2dfad5f53e04ef3a28c2d4881 Mon Sep 17 00:00:00 2001 From: Andrew Rowley Date: Fri, 1 Dec 2023 07:23:35 +0000 Subject: [PATCH 7/7] Try 2 retries --- test_whole_board/test_whole_board.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_whole_board/test_whole_board.py b/test_whole_board/test_whole_board.py index 27c7558902..3b535d54da 100644 --- a/test_whole_board/test_whole_board.py +++ b/test_whole_board/test_whole_board.py @@ -207,7 +207,7 @@ def test_run(x, y, b): triad=(x, y, b), machine_name=SPALLOC_MACHINE) with job: job.launch_keepalive_task() - job.wait_until_ready(n_retries=1) + job.wait_until_ready(n_retries=2) state = job.get_state() # If queued or destroyed skip test if state == SpallocState.QUEUED: