Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test suite optimization: Migrate tests to pytest #917

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ install:
- python -m pip install --user ".[dev]"

test_script:
- python -m nose --tests benchexec.tablegenerator
- python -m pytest benchexec/tablegenerator
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ stages:
script:
- sudo -u $PRIMARY_USER
COVERAGE_PROCESS_START=.coveragerc
coverage run -m nose
coverage run -m pytest
after_script:
- sudo -u $PRIMARY_USER coverage combine
- sudo -u $PRIMARY_USER coverage report
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ before_script:
# and this is difficult to fix, but occurs only in high-load environments.
- sed -i benchexec/test_integration/__init__.py -e '/test_simple_parallel/ i \ @unittest.skip("Fails nondeterministically on Travis, probably issue 656")'
script:
- python -m nose
- python -m pytest
# Revert local modification before checking source format
- git checkout .
- if which black; then black . --check --diff; fi
Expand Down
3 changes: 1 addition & 2 deletions benchexec/tablegenerator/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@
# This sets the rounding mode for all Decimal operations in the process.
# It is actually used only as default context for new contexts, but because we set this
# at import time and before any threads are started, it should work according to its
# documentation. We double check with the context of the current thread.
# documentation.
decimal.DefaultContext.rounding = decimal.ROUND_HALF_UP
assert decimal.getcontext().rounding == decimal.ROUND_HALF_UP
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this can be kept now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tried multiple ways so that we can still keep the double-checking assertion but the tests only work when it is removed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that whenever this assertion fails, table-generator would do wrong computations! Actually, it would be to have a test that shows that, can you do that? This would then make it clear that we need a way for setting the correct rounding mode.


DEFAULT_TIME_PRECISION = 3
DEFAULT_TOOLTIP_PRECISION = 2
Expand Down
21 changes: 21 additions & 0 deletions benchexec/tablegenerator/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# This file is part of BenchExec, a framework for reliable benchmarking:
# https://github.com/sosy-lab/benchexec
#
# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
#
# SPDX-License-Identifier: Apache-2.0

# The following code is intended for use in pytest sessions and ensures consistent rounding behavior during tests.
# It sets the rounding mode to ROUND_HALF_UP for both the DefaultContext and the local context at the start of the session.
# This helps maintain reproducibility in test results by avoiding discrepancies in rounding behavior across different environments or configurations.
# The use of pytest_sessionstart hook from `conftest.py` ensures that this setup is applied globally at the beginning of each test session.

import decimal

original_default_rounding = decimal.DefaultContext.rounding
original_local_rounding = decimal.getcontext().rounding


def pytest_sessionstart(session):
decimal.DefaultContext.rounding = decimal.ROUND_HALF_UP
decimal.getcontext().rounding = decimal.ROUND_HALF_UP
188 changes: 92 additions & 96 deletions benchexec/tablegenerator/test_statvalue.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,172 +7,168 @@

from decimal import Decimal
import sys
import unittest
import pytest

from benchexec.tablegenerator.statistics import StatValue

sys.dont_write_bytecode = True # prevent creation of .pyc files


class TestStatValue(unittest.TestCase):
class TestStatValue:
@classmethod
def setUpClass(cls):
cls.longMessage = True
cls.maxDiff = None
def setup_class(cls):
cls.long_message = True
cls.max_diff = None

def test_empty(self):
self.assertIsNone(StatValue.from_list([]))
assert StatValue.from_list([]) is None

def test_single_value(self):
v = Decimal("1.23")
s = StatValue.from_list([v])
self.assertEqual(s.sum, v)
self.assertEqual(s.avg, v)
self.assertEqual(s.max, v)
self.assertEqual(s.min, v)
self.assertEqual(s.median, v)
self.assertEqual(s.stdev, Decimal(0))
assert s.sum == v
assert s.avg == v
assert s.max == v
assert s.min == v
assert s.median == v
assert s.stdev == Decimal(0)

def test_two_values(self):
v1 = Decimal("1.23")
v2 = Decimal("4.56")
for t in [[v1, v2], [v2, v1]]:
s = StatValue.from_list(t)
self.assertEqual(s.sum, v1 + v2)
self.assertEqual(s.avg, (v1 + v2) / Decimal(2))
self.assertEqual(s.max, v2)
self.assertEqual(s.min, v1)
self.assertEqual(s.median, (v1 + v2) / Decimal(2))
self.assertEqual(s.stdev, Decimal("1.665"))
assert s.sum == v1 + v2
assert s.avg == (v1 + v2) / Decimal(2)
assert s.max == v2
assert s.min == v1
assert s.median == (v1 + v2) / Decimal(2)
assert s.stdev == Decimal("1.665")

def test_three_values(self):
v1 = Decimal("0.123")
v2 = Decimal("4.56")
v3 = Decimal("789")
for t in [[v1, v2, v3], [v3, v2, v1], [v2, v1, v3]]:
s = StatValue.from_list(t)
self.assertEqual(s.sum, v1 + v2 + v3)
self.assertEqual(s.avg, (v1 + v2 + v3) / Decimal(3))
self.assertEqual(s.max, v3)
self.assertEqual(s.min, v1)
self.assertEqual(s.median, v2)
self.assertAlmostEqual(s.stdev, Decimal("370.83879721"))
assert s.sum == v1 + v2 + v3
assert s.avg == (v1 + v2 + v3) / Decimal(3)
assert s.max == v3
assert s.min == v1
assert s.median == v2
assert pytest.approx(s.stdev, abs=1e-8) == Decimal("370.83879721")

def test_nan(self):
nan = Decimal("nan")
v = Decimal("0.123")

s = StatValue.from_list([nan])
self.assertTrue(s.sum.is_nan(), f"Not NaN, but {s.sum}")
self.assertTrue(s.avg.is_nan(), f"Not NaN, but {s.avg}")
self.assertTrue(s.max.is_nan(), f"Not NaN, but {s.max}")
self.assertTrue(s.min.is_nan(), f"Not NaN, but {s.min}")
self.assertTrue(s.median.is_nan(), f"Not NaN, but {s.median}")
self.assertTrue(s.stdev.is_nan(), f"Not NaN, but {s.stdev}")
assert s.sum.is_nan()
assert s.avg.is_nan()
assert s.max.is_nan()
assert s.min.is_nan()
assert s.median.is_nan()
assert s.stdev.is_nan()

s = StatValue.from_list([nan, v])
self.assertTrue(s.sum.is_nan(), f"Not NaN, but {s.sum}")
self.assertTrue(s.avg.is_nan(), f"Not NaN, but {s.avg}")
self.assertTrue(s.max.is_nan(), f"Not NaN, but {s.max}")
self.assertTrue(s.min.is_nan(), f"Not NaN, but {s.min}")
self.assertTrue(s.median.is_nan(), f"Not NaN, but {s.median}")
self.assertTrue(s.stdev.is_nan(), f"Not NaN, but {s.stdev}")
assert s.sum.is_nan()
assert s.avg.is_nan()
assert s.max.is_nan()
assert s.min.is_nan()
assert s.median.is_nan()
assert s.stdev.is_nan()

def test_one_inf(self):
inf = Decimal("inf")
v = Decimal("0.123")

s = StatValue.from_list([inf])
self.assertEqual(s.sum, inf, f"Not Inf, but {s.sum}")
self.assertEqual(s.avg, inf, f"Not Inf, but {s.avg}")
self.assertEqual(s.max, inf, f"Not Inf, but {s.max}")
self.assertEqual(s.min, inf, f"Not Inf, but {s.min}")
self.assertEqual(s.median, inf, f"Not Inf, but {s.median}")
self.assertEqual(s.stdev, inf, f"Not Inf, but {s.stdev}")
assert s.sum == inf
assert s.avg == inf
assert s.max == inf
assert s.min == inf
assert s.median == inf
assert s.stdev == inf

s = StatValue.from_list([inf, v])
self.assertEqual(s.sum, inf, f"Not Inf, but {s.sum}")
self.assertEqual(s.avg, inf, f"Not NaN, but {s.avg}")
self.assertEqual(s.max, inf, f"Not NaN, but {s.max}")
self.assertEqual(s.min, v, f"Not NaN, but {s.min}")
self.assertEqual(s.median, inf, f"Not NaN, but {s.median}")
self.assertEqual(s.stdev, inf, f"Not NaN, but {s.stdev}")
assert s.sum == inf
assert s.avg == inf
assert s.max == inf
assert s.min == v
assert s.median == inf
assert s.stdev == inf

def test_one_negative_inf(self):
ninf = Decimal("-inf")
inf = Decimal("inf")
v = Decimal("0.123")

s = StatValue.from_list([ninf])
self.assertEqual(s.sum, ninf, f"Not -Inf, but {s.sum}")
self.assertEqual(s.avg, ninf, f"Not -Inf, but {s.avg}")
self.assertEqual(s.max, ninf, f"Not -Inf, but {s.max}")
self.assertEqual(s.min, ninf, f"Not -Inf, but {s.min}")
self.assertEqual(s.median, ninf, f"Not -Inf, but {s.median}")
self.assertEqual(s.stdev, inf, f"Not Inf, but {s.stdev}")
assert s.sum == ninf
assert s.avg == ninf
assert s.max == ninf
assert s.min == ninf
assert s.median == ninf
assert s.stdev == inf

s = StatValue.from_list([ninf, v])
self.assertEqual(s.sum, ninf, f"Not -Inf, but {s.sum}")
self.assertEqual(s.avg, ninf, f"Not -Inf, but {s.avg}")
self.assertEqual(s.max, v, f"Not 0.123, but {s.max}")
self.assertEqual(s.min, ninf, f"Not -Inf, but {s.min}")
self.assertEqual(s.median, ninf, f"Not -Inf, but {s.median}")
self.assertEqual(s.stdev, inf, f"Not Inf, but {s.stdev}")
assert s.sum == ninf
assert s.avg == ninf
assert s.max == v
assert s.min == ninf
assert s.median == ninf
assert s.stdev == inf

def test_multiple_positive_inf(self):
inf = Decimal("inf")
v = Decimal("0.123")

# Equal number of infs
s = StatValue.from_list([inf, inf, v])
self.assertEqual(s.sum, inf, f"Not Inf, but {s.sum}")
self.assertEqual(s.avg, inf, f"Not Inf, but {s.avg}")
self.assertEqual(s.max, inf, f"Not Inf, but {s.max}")
self.assertEqual(s.min, v, f"Not 0.123, but {s.min}")
self.assertEqual(s.median, inf, f"Not Inf, but {s.median}")
self.assertEqual(s.stdev, inf, f"Not Inf, but {s.stdev}")

# Unequal number of infs
assert s.sum == inf
assert s.avg == inf
assert s.max == inf
assert s.min == v
assert s.median == inf
assert s.stdev == inf

s = StatValue.from_list([inf, inf, inf, v])
self.assertEqual(s.sum, inf, f"Not Inf, but {s.sum}")
self.assertEqual(s.avg, inf, f"Not Inf, but {s.avg}")
self.assertEqual(s.max, inf, f"Not Inf, but {s.max}")
self.assertEqual(s.min, v, f"Not 0.123, but {s.min}")
self.assertEqual(s.median, inf, f"Not Inf, but {s.median}")
self.assertEqual(s.stdev, inf, f"Not Inf, but {s.stdev}")
assert s.sum == inf
assert s.avg == inf
assert s.max == inf
assert s.min == v
assert s.median == inf
assert s.stdev == inf

def test_multiple_negative_inf(self):
ninf = Decimal("-inf")
inf = Decimal("inf")
v = Decimal("0.123")

# Equal number of negative infs
s = StatValue.from_list([ninf, ninf, v])
self.assertEqual(s.sum, ninf, f"Not -Inf, but {s.sum}")
self.assertEqual(s.avg, ninf, f"Not -Inf, but {s.avg}")
self.assertEqual(s.max, v, f"Not 0.123, but {s.max}")
self.assertEqual(s.min, ninf, f"Not -Inf, but {s.min}")
self.assertEqual(s.median, ninf, f"Not -Inf, but {s.median}")
self.assertEqual(s.stdev, inf, f"Not Inf, but {s.stdev}")

# Unequal number of negative infs
assert s.sum == ninf
assert s.avg == ninf
assert s.max == v
assert s.min == ninf
assert s.median == ninf
assert s.stdev == inf

s = StatValue.from_list([ninf, ninf, ninf, v])
self.assertEqual(s.sum, ninf, f"Not -Inf, but {s.sum}")
self.assertEqual(s.avg, ninf, f"Not -Inf, but {s.avg}")
self.assertEqual(s.max, v, f"Not 0.123, but {s.max}")
self.assertEqual(s.min, ninf, f"Not -Inf, but {s.min}")
self.assertEqual(s.median, ninf, f"Not -Inf, but {s.median}")
self.assertEqual(s.stdev, inf, f"Not Inf, but {s.stdev}")
assert s.sum == ninf
assert s.avg == ninf
assert s.max == v
assert s.min == ninf
assert s.median == ninf
assert s.stdev == inf

def test_multiple_positive_and_negative_inf(self):
inf = Decimal("inf")
ninf = Decimal("-inf")
v = Decimal("0.123")

s = StatValue.from_list([inf, ninf, v])
self.assertTrue(s.sum.is_nan(), f"Not NaN, but {s.sum}")
self.assertTrue(s.avg.is_nan(), f"Not NaN, but {s.avg}")
self.assertEqual(s.max, inf, f"Not Inf, but {s.max}")
self.assertEqual(s.min, ninf, f"Not -Inf, but {s.min}")
self.assertEqual(s.median, v, f"Not 0.123, but {s.median}")
self.assertTrue(s.stdev.is_nan(), f"Not NaN, but {s.stdev}")
assert s.sum.is_nan()
assert s.avg.is_nan()
assert s.max == inf
assert s.min == ninf
assert s.median == v
assert s.stdev.is_nan()
Loading