-
Notifications
You must be signed in to change notification settings - Fork 31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Get rid of test front-loading in the duration_based_chunks algo #27
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
|
||
import pytest | ||
|
||
from pytest_split.algorithms import Algorithms | ||
from src.pytest_split.algorithms import Algorithms, duration_based_chunks | ||
|
||
item = namedtuple("item", "nodeid") | ||
|
||
|
@@ -11,7 +11,7 @@ class TestAlgorithms: | |
@pytest.mark.parametrize("algo_name", Algorithms.names()) | ||
def test__split_test(self, algo_name): | ||
durations = {"a": 1, "b": 1, "c": 1} | ||
items = [item(x) for x in durations.keys()] | ||
items = [item(x) for x in durations] | ||
algo = Algorithms[algo_name].value | ||
first, second, third = algo(splits=3, items=items, durations=durations) | ||
|
||
|
@@ -83,3 +83,27 @@ def test__split_tests_calculates_avg_test_duration_only_on_present_tests(self, a | |
expected_first, expected_second = expected | ||
assert first.selected == expected_first | ||
assert second.selected == expected_second | ||
|
||
def test_each_group_is_assigned_a_test_front_loaded(self): | ||
item = namedtuple("item", "nodeid") | ||
|
||
durations = {"a": 100, "b": 1, "c": 1, "d": 1, "e": 1, "f": 1, "g": 1, "h": 1} | ||
|
||
items = [item(x) for x in ["a", "b", "c", "d", "e", "f", "g", "h"]] | ||
|
||
groups = duration_based_chunks(8, items, durations) | ||
|
||
for i in range(7): | ||
Comment on lines
+94
to
+96
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why 8 above and range(7) here in the loop below? Could we somehow get rid of the magical numbers and use e.g. variables which names would help the reader to understand what's going on? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe use |
||
assert groups[i].selected != [] | ||
|
||
def test_each_group_is_assigned_a_test_back_loaded(self): | ||
item = namedtuple("item", "nodeid") | ||
|
||
durations = {"a": 1, "b": 1, "c": 1, "d": 1, "e": 1, "f": 1, "g": 1, "h": 100} | ||
|
||
items = [item(x) for x in ["a", "b", "c", "d", "e", "f", "g", "h"]] | ||
|
||
groups = duration_based_chunks(8, items, durations) | ||
|
||
for i in range(7): | ||
assert groups[i].selected != [] | ||
Comment on lines
+108
to
+109
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this same as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That looks like it would probably work; maybe just add a comment to indicate that we're making sure there aren't any empty groups. Or maybe explicitly check that the size of each group is nonzero. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
IMO |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,20 +61,20 @@ class TestSplitToSuites: | |
"least_duration", | ||
["test_1", "test_2", "test_3", "test_4", "test_5", "test_6", "test_7", "test_8", "test_9", "test_10"], | ||
), | ||
(2, 1, "duration_based_chunks", ["test_1", "test_2", "test_3", "test_4", "test_5", "test_6", "test_7"]), | ||
(2, 2, "duration_based_chunks", ["test_8", "test_9", "test_10"]), | ||
(2, 1, "duration_based_chunks", ["test_1", "test_2", "test_3", "test_4", "test_5", "test_6"]), | ||
(2, 2, "duration_based_chunks", ["test_7", "test_8", "test_9", "test_10"]), | ||
(2, 1, "least_duration", ["test_1", "test_3", "test_5", "test_7", "test_9"]), | ||
(2, 2, "least_duration", ["test_2", "test_4", "test_6", "test_8", "test_10"]), | ||
(3, 1, "duration_based_chunks", ["test_1", "test_2", "test_3", "test_4", "test_5"]), | ||
(3, 2, "duration_based_chunks", ["test_6", "test_7", "test_8"]), | ||
(3, 3, "duration_based_chunks", ["test_9", "test_10"]), | ||
(3, 2, "duration_based_chunks", ["test_6", "test_7"]), | ||
(3, 3, "duration_based_chunks", ["test_8", "test_9", "test_10"]), | ||
(3, 1, "least_duration", ["test_1", "test_4", "test_7", "test_10"]), | ||
(3, 2, "least_duration", ["test_2", "test_5", "test_8"]), | ||
(3, 3, "least_duration", ["test_3", "test_6", "test_9"]), | ||
(4, 1, "duration_based_chunks", ["test_1", "test_2", "test_3", "test_4"]), | ||
(4, 2, "duration_based_chunks", ["test_5", "test_6", "test_7"]), | ||
(4, 3, "duration_based_chunks", ["test_8", "test_9"]), | ||
(4, 4, "duration_based_chunks", ["test_10"]), | ||
(4, 1, "duration_based_chunks", ["test_1", "test_2", "test_3"]), | ||
(4, 2, "duration_based_chunks", ["test_4", "test_5"]), | ||
(4, 3, "duration_based_chunks", ["test_6"]), | ||
(4, 4, "duration_based_chunks", ["test_7", "test_8", "test_9", "test_10"]), | ||
Comment on lines
+74
to
+77
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't this mean that the expected durations for each group will be:
If so, it doesn't feel right 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1-5 are 1 second tests, while 5-10 are 2 second tests, so I believe the timings are
And yes this is weird, but it's also what you should expect if you want to stop adding tests before going over the threshold of 15/4=3,75 seconds, and keeping absolute order 🤷 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a very simple greedy algorithm; it's pretty much unavoidable that you're going to get strange far-from-optimal results like this. The least_durations algorithm is much better and IMO should be the default and this algorithm should simply be removed. But this algorithm does have the benefit of maintaining the sequence of tests, which I suppose could potentially be beneficial in some cases if there are ordering dependencies between tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, here's something to consider potentially: exceed the threshold, but keep track of a running total overage and use that when determining if you're over the threshold. (If you never exceed the threshold, then you're basically guaranteed to end up with the last group being larger than the rest) There are probably a few other heuristics that you could apply here to try to improve things a little bit. For instance, something like this: ignore overage if the slot is empty and unconditionally assign, if it isn't empty then check to see if the resulting overage is less than half of the time required for that test, if so pack it, otherwise bump it to the next group. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I have a use case which requires this plugin to support maintaining the order of the tests, which is why we need to keep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll close this for now then 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, without these changes, is duration_based_chunks still producing empty groups? Or did that get fixed in a separate PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes probably. I noticed there are a few conflicts, so perhaps there's been another fix merged, but that's what this was meant to fix. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, well my point was that even if the splitting isn't as perfect as it could be, at least this fix doesn't result in empty groups, so it should be merged and then possibly improved, instead of being closed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure thing. I've reopened it - feel free to make any additional changes you wish (cc @jerry-git) and merge. I'm on holiday now myself so likely won't be able to apply any code changes for a while. |
||
(4, 1, "least_duration", ["test_1", "test_5", "test_9"]), | ||
(4, 2, "least_duration", ["test_2", "test_6", "test_10"]), | ||
(4, 3, "least_duration", ["test_3", "test_7"]), | ||
|
@@ -107,6 +107,7 @@ def test_it_splits(self, test_idx, splits, group, algo, expected, legacy_flag, e | |
"--splitting-algorithm", | ||
algo, | ||
) | ||
|
||
result.assertoutcome(passed=len(expected)) | ||
assert _passed_test_names(result) == expected | ||
|
||
|
@@ -128,16 +129,16 @@ def test_it_adapts_splits_based_on_new_and_deleted_tests(self, example_suite, du | |
json.dump(durations, f) | ||
|
||
result = example_suite.inline_run("--splits", "3", "--group", "1", "--durations-path", durations_path) | ||
result.assertoutcome(passed=4) | ||
assert _passed_test_names(result) == ["test_1", "test_2", "test_3", "test_4"] | ||
result.assertoutcome(passed=3) | ||
assert _passed_test_names(result) == ["test_1", "test_2", "test_3"] # 3 sec | ||
|
||
result = example_suite.inline_run("--splits", "3", "--group", "2", "--durations-path", durations_path) | ||
result.assertoutcome(passed=3) | ||
assert _passed_test_names(result) == ["test_5", "test_6", "test_7"] | ||
result.assertoutcome(passed=1) | ||
assert _passed_test_names(result) == ["test_4"] # 1 sec | ||
|
||
result = example_suite.inline_run("--splits", "3", "--group", "3", "--durations-path", durations_path) | ||
result.assertoutcome(passed=3) | ||
assert _passed_test_names(result) == ["test_8", "test_9", "test_10"] | ||
result.assertoutcome(passed=6) | ||
assert _passed_test_names(result) == ["test_5", "test_6", "test_7", "test_8", "test_9", "test_10"] # 3 sec | ||
|
||
def test_handles_case_of_no_durations_for_group(self, example_suite, durations_path): | ||
with open(durations_path, "w") as f: | ||
|
@@ -149,8 +150,8 @@ def test_handles_case_of_no_durations_for_group(self, example_suite, durations_p | |
|
||
def test_it_splits_with_other_collect_hooks(self, testdir, durations_path): | ||
expected_tests_per_group = [ | ||
["test_1", "test_2", "test_3"], | ||
["test_4", "test_5"], | ||
["test_1", "test_2"], | ||
["test_3", "test_4", "test_5"], | ||
] | ||
|
||
tests_to_run = "".join(f"@pytest.mark.mark_one\ndef test_{num}(): pass\n" for num in range(1, 6)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be the same if the loop would be over
sorted(durations.keys())
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you mean, sorry?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think he's saying you can do
items = sorted(durations.keys())
instead of duplicating all of the items.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah I'm basically asking if this would be the same:
IMO it would be better considering readability as then it's explicit that there's one item for each thingy in
durations
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just copied in the example test from here and made sure we had one example that was extremely front-loaded and one that was extremely back-loaded. I can make it less explicit if you prefer.