From b6fc38d9dfe6ae33e48ac1e756928737aa908f9f Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 10 May 2023 17:39:23 +0200 Subject: [PATCH 001/224] Added skeleton for PatchProvider --- varats-core/varats/provider/patch/__init__.py | 0 varats-core/varats/provider/patch/patch.py | 11 ++++ .../varats/provider/patch/patch_provider.py | 66 +++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 varats-core/varats/provider/patch/__init__.py create mode 100644 varats-core/varats/provider/patch/patch.py create mode 100644 varats-core/varats/provider/patch/patch_provider.py diff --git a/varats-core/varats/provider/patch/__init__.py b/varats-core/varats/provider/patch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/varats-core/varats/provider/patch/patch.py b/varats-core/varats/provider/patch/patch.py new file mode 100644 index 000000000..5c6f1048c --- /dev/null +++ b/varats-core/varats/provider/patch/patch.py @@ -0,0 +1,11 @@ +from pathlib import Path +import typing as tp + + +class Patch: + """A class for storing a project-specific Patch""" + project: str + shortname: str + description: str + path: Path + revisions: tp.List[str] diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py new file mode 100644 index 000000000..4ca76e4d4 --- /dev/null +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -0,0 +1,66 @@ +import typing as tp +from pathlib import Path + +import benchbuild as bb +from benchbuild.project import Project +from benchbuild.source.base import target_prefix + +from varats.provider.patch.patch import Patch +from varats.provider.provider import Provider, ProviderType + + +class PatchesNotFoundError(FileNotFoundError): + # TODO: Implement me + pass + + +class PatchProvider(Provider): + """A provider for getting patch files for a certain project""" + + patches_repository = "https://github.com/se-sic/vara-project-patches.git" + + def __init__(self, project: tp.Type[Project]): + patches_project_dir = Path(self._get_patches_repository_path() / self.project.NAME) + + if not patches_project_dir.is_dir(): + # TODO: Add proper error message + raise PatchesNotFoundError() + + patches_config_file = Path(patches_project_dir / ".patches.xml") + + if not patches_config_file.exists(): + # TODO: Add proper error handling + # This should probably be a different error since it is related to the patches config + # not the patches itself + raise PatchesNotFoundError() + + self.project_patches = self._parse_patches_config(patches_config_file) + + super().__init__(project) + + @classmethod + def create_provider_for_project(cls: tp.Type[ProviderType], project: tp.Type[Project]) -> tp.Optional[ProviderType]: + pass + + @classmethod + def create_default_provider(cls: tp.Type[ProviderType], project: tp.Type[Project]) -> ProviderType: + pass + + @staticmethod + def _get_patches_repository_path() -> Path: + patches_source = bb.source.Git( + remote=PatchProvider.patches_repository, + local="ConfigurableSystems", + refspec="origin/HEAD", + limit=1, + ) + + patches_source.fetch() + + return Path(Path(target_prefix()) / patches_source.local) + + @staticmethod + def _parse_patches_config(config_file: Path) -> tp.List[Patch]: + # TODO: Implement XML parsing for patches config + pass + From ddd0915563f1ff3fd9539f64ae63d1ee56db1858 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Thu, 1 Jun 2023 09:26:41 +0200 Subject: [PATCH 002/224] First setup for PatchProvider and tests --- .../test-patch-configuration.xml | 68 +++++++++++++++ tests/provider/test_patch_provider.py | 43 ++++++++++ varats-core/varats/provider/patch/patch.py | 84 +++++++++++++++++-- .../varats/provider/patch/patch_provider.py | 2 +- 4 files changed, 190 insertions(+), 7 deletions(-) create mode 100644 tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml create mode 100644 tests/provider/test_patch_provider.py diff --git a/tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml b/tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml new file mode 100644 index 000000000..8e4ff4ca8 --- /dev/null +++ b/tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml @@ -0,0 +1,68 @@ + + github.com/se-sic/FeaturePerfCSCollection.git + + + unrestricted-range + + Patch describing an unrestricted range of commits, meaning it should include all commits + + bug.patch + + + include-single-revision + + Patch that is valid for a single revision + + bug.patch + + + include-revision-range + + Patch that is valid for a range of commits + + bug.patch + + + include-single-and-revision-range + + Patch that is valid for a revision range AND another single commit + + bug.patch + + + exclude-single-revision + + Patch that is valid for all commits except a single one + + bug.patch + + + exclude-revision-range + + Patch that excludes a range of commits (Otherwise includes all) + + bug.patch + + + exclude-single-and-revision-range + + Patch that excludes a certain range and individual commits (Otherwise includes all) + + bug.patch + + + include-range-exclude-single + + Patch valid for a range of commits where a single one is excluded + + bug.patch + + + include-range-exclude-range + + Patch valid for a range of commits where a subrange is explicitly excluded + + bug.patch + + + diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py new file mode 100644 index 000000000..4fe6f645a --- /dev/null +++ b/tests/provider/test_patch_provider.py @@ -0,0 +1,43 @@ +import unittest +from pathlib import Path + +from tests.helper_utils import TEST_INPUTS_DIR +from varats.provider.patch.patch import ProjectPatches + + +class TestPatchProvider(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +class TestPatchConfiguration(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.patch_config = ProjectPatches.from_xml( Path(TEST_INPUTS_DIR/'patch-configs/test-patch-configuration.xml') ) + + def test_unrestricted_range(self): + pass + + def test_included_single_revision(self): + pass + + def test_included_revision_range(self): + pass + + def test_included_single_and_revision_range(self): + pass + + def test_exclude_single_revision(self): + pass + + def test_exclude_revision_range(self): + pass + + def test_exclude_single_and_revision_range(self): + pass + + def test_include_range_exclude_single(self): + pass + + def test_include_range_exclude_range(self): + pass diff --git a/varats-core/varats/provider/patch/patch.py b/varats-core/varats/provider/patch/patch.py index 5c6f1048c..722777f93 100644 --- a/varats-core/varats/provider/patch/patch.py +++ b/varats-core/varats/provider/patch/patch.py @@ -1,11 +1,83 @@ +import os from pathlib import Path import typing as tp +import xml.etree.ElementTree as ET + +from benchbuild.utils.revision_ranges import _get_all_revisions_between, _get_git_for_path, RevisionRange, SingleRevision + +from varats.utils.git_util import CommitHash, ShortCommitHash class Patch: - """A class for storing a project-specific Patch""" - project: str - shortname: str - description: str - path: Path - revisions: tp.List[str] + """A class for storing a single project-specific Patch""" + + def __init__(self, project: str, shortname: str, description: str, path: Path, valid_revisions: tp.Optional[tp.Set[CommitHash]] = None + , invalid_revisions: tp.Optional[tp.Set[CommitHash]] = None): + self.project: str = project + self.shortname: str = shortname + self.description: str = description + self.path: Path = path + self.valid_revisions: tp.Set[CommitHash] = valid_revisions + self.invalid_revisions: tp.Set[CommitHash] = invalid_revisions + + +class ProjectPatches: + """A class storing a set of patches specific to a project""" + + def __init__(self, project_name: str, repository: str, patches: tp.List[Patch]): + self.project_name: str = project_name + self.repository: str = repository + self.patches: tp.List[Patch] = patches + + def get_patches_for_revision(self, revision: CommitHash): + # This could be more concise with some nested list comprehensions + # But it would make it harder to understand + valid_patches: tp.Set[Patch] = set() + + for patch in self.patches: + add_patch = True + if patch.valid_revisions and revision not in patch.valid_revisions: + add_patch = False + + if patch.invalid_revisions and revision in patch.invalid_revisions: + add_patch = False + + if add_patch: + valid_patches.add(patch) + + @staticmethod + def from_xml(xml_path: Path): + project_name: str = Path(os.path.abspath(xml_path)).parts[-2] + tree = ET.parse(xml_path) + root = tree.getroot() + + if len(root.findall("repository")) != 1: + # TODO: Proper error handling + raise RuntimeError("Only one repository allowed") + + repository = root.findtext("repository") + + repo_git = _get_git_for_path(repository) + patch_list: tp.List[Patch] = [] + + # We explicitly ignore further validity checking of the XML at that point + # As for now, this is already done by a CI Job in the vara-project-patches + # repository + for patch in root.findall("patch"): + shortname = patch.findtext("shortname") + description = patch.findtext("description") + path = Path(patch.findtext("path")) + + include_revisions: tp.Set[CommitHash] = set() + + include_revs_tag = patch.find("include_revisions") + + if include_revs_tag: + pass + else: + revs_list = repo_git('log', '--pretty="%H"', '--first-parent').strip().split() + + exclude_revs_tag = patch.find("exclude_revisions") + pass + + diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 4ca76e4d4..37a68f1fe 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -26,7 +26,7 @@ def __init__(self, project: tp.Type[Project]): # TODO: Add proper error message raise PatchesNotFoundError() - patches_config_file = Path(patches_project_dir / ".patches.xml") + patches_config_file = Path(patches_project_dir / "test-patch-configuration.xml") if not patches_config_file.exists(): # TODO: Add proper error handling From d15a64809899433a9aa828a49a506af615e25c51 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Mon, 26 Jun 2023 14:42:53 +0200 Subject: [PATCH 003/224] * Finished parsing of patch configuration --- varats-core/varats/provider/patch/patch.py | 33 ++++++++++++++++--- .../varats/provider/patch/patch_provider.py | 4 +-- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/varats-core/varats/provider/patch/patch.py b/varats-core/varats/provider/patch/patch.py index 722777f93..651d83194 100644 --- a/varats-core/varats/provider/patch/patch.py +++ b/varats-core/varats/provider/patch/patch.py @@ -3,7 +3,8 @@ import typing as tp import xml.etree.ElementTree as ET -from benchbuild.utils.revision_ranges import _get_all_revisions_between, _get_git_for_path, RevisionRange, SingleRevision +from benchbuild.utils.revision_ranges import _get_all_revisions_between, _get_git_for_path, RevisionRange, \ + SingleRevision from varats.utils.git_util import CommitHash, ShortCommitHash @@ -11,7 +12,8 @@ class Patch: """A class for storing a single project-specific Patch""" - def __init__(self, project: str, shortname: str, description: str, path: Path, valid_revisions: tp.Optional[tp.Set[CommitHash]] = None + def __init__(self, project: str, shortname: str, description: str, path: Path, + valid_revisions: tp.Optional[tp.Set[CommitHash]] = None , invalid_revisions: tp.Optional[tp.Set[CommitHash]] = None): self.project: str = project self.shortname: str = shortname @@ -21,7 +23,7 @@ def __init__(self, project: str, shortname: str, description: str, path: Path, v self.invalid_revisions: tp.Set[CommitHash] = invalid_revisions -class ProjectPatches: +class ProjectPatchesConfiguration: """A class storing a set of patches specific to a project""" def __init__(self, project_name: str, repository: str, patches: tp.List[Patch]): @@ -60,6 +62,21 @@ def from_xml(xml_path: Path): repo_git = _get_git_for_path(repository) patch_list: tp.List[Patch] = [] + def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: + res: tp.Set[CommitHash] = set() + + for revision_tag in revisions_tag.findall("single_revision"): + res.add(ShortCommitHash(revision_tag.text)) + + for revision_range_tag in revisions_tag.findall("revision_range"): + start_tag = revision_range_tag.find("start") + end_tag = revision_range_tag.find("end") + + res.update( + {ShortCommitHash(h) for h in _get_all_revisions_between(start_tag.text, end_tag.text, repo_git)}) + + return res + # We explicitly ignore further validity checking of the XML at that point # As for now, this is already done by a CI Job in the vara-project-patches # repository @@ -73,11 +90,17 @@ def from_xml(xml_path: Path): include_revs_tag = patch.find("include_revisions") if include_revs_tag: - pass + include_revisions = parse_revisions(include_revs_tag) else: revs_list = repo_git('log', '--pretty="%H"', '--first-parent').strip().split() + include_revisions.update([ShortCommitHash(rev) for rev in revs_list]) + exclude_revs_tag = patch.find("exclude_revisions") - pass + if exclude_revs_tag: + include_revisions.difference_update(parse_revisions(exclude_revs_tag)) + + patch_list.append(Patch(project_name, shortname, description, path, include_revisions)) + return ProjectPatchesConfiguration(project_name, repository, patch_list) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 37a68f1fe..300e22c75 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -5,7 +5,7 @@ from benchbuild.project import Project from benchbuild.source.base import target_prefix -from varats.provider.patch.patch import Patch +from varats.provider.patch.patch import Patch, ProjectPatchesConfiguration from varats.provider.provider import Provider, ProviderType @@ -60,7 +60,7 @@ def _get_patches_repository_path() -> Path: return Path(Path(target_prefix()) / patches_source.local) @staticmethod - def _parse_patches_config(config_file: Path) -> tp.List[Patch]: + def _parse_patches_config(config_file: Path) -> ProjectPatchesConfiguration: # TODO: Implement XML parsing for patches config pass From 45dafff5974b6a3dceb745bf26a3f4cb13088bc7 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Mon, 26 Jun 2023 14:53:48 +0200 Subject: [PATCH 004/224] * Updated test patch copnfig with proper revisions/ranges --- .../test-patch-configuration.xml | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml b/tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml index 8e4ff4ca8..8a291f364 100644 --- a/tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml +++ b/tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml @@ -14,6 +14,9 @@ Patch that is valid for a single revision bug.patch + + 8ca5cc28e6746eef7340064b5d843631841bf31e + include-revision-range @@ -21,6 +24,12 @@ Patch that is valid for a range of commits bug.patch + + + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e + + include-single-and-revision-range @@ -28,6 +37,15 @@ Patch that is valid for a revision range AND another single commit bug.patch + + + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e + + + 27f17080376e409860405c40744887d81d6b3f34 + + exclude-single-revision @@ -35,6 +53,9 @@ Patch that is valid for all commits except a single one bug.patch + + 8ca5cc28e6746eef7340064b5d843631841bf31e + exclude-revision-range @@ -42,6 +63,12 @@ Patch that excludes a range of commits (Otherwise includes all) bug.patch + + + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e + + exclude-single-and-revision-range @@ -49,6 +76,15 @@ Patch that excludes a certain range and individual commits (Otherwise includes all) bug.patch + + + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e + + + 27f17080376e409860405c40744887d81d6b3f34 + + include-range-exclude-single @@ -56,6 +92,15 @@ Patch valid for a range of commits where a single one is excluded bug.patch + + + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e + + + + 162db88346b06be20faac6976f1ff9bad986accf + include-range-exclude-range @@ -63,6 +108,18 @@ Patch valid for a range of commits where a subrange is explicitly excluded bug.patch + + + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 4300ea495e7f013f68e785fdde5c4ead81297999 + + + + + c051e44a973ee31b3baa571407694467a513ba68 + a94fb35ca49719028a1c50bdbc2fb82122043f46 + + From b971ced6e59476ff2d8fe29c03c14939761b72a9 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 27 Jun 2023 11:07:37 +0200 Subject: [PATCH 005/224] * Fixed patch config parsing for tests --- .../test-patch-configuration.xml | 30 ++--- tests/provider/test_patch_provider.py | 29 ++++- varats-core/varats/provider/patch/patch.py | 106 ----------------- .../varats/provider/patch/patch_provider.py | 111 +++++++++++++++++- 4 files changed, 149 insertions(+), 127 deletions(-) rename tests/TEST_INPUTS/patch-configs/{ => FeaturePerfCSCollection}/test-patch-configuration.xml (72%) delete mode 100644 varats-core/varats/provider/patch/patch.py diff --git a/tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml b/tests/TEST_INPUTS/patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml similarity index 72% rename from tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml rename to tests/TEST_INPUTS/patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml index 8a291f364..5bda2cbec 100644 --- a/tests/TEST_INPUTS/patch-configs/test-patch-configuration.xml +++ b/tests/TEST_INPUTS/patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml @@ -1,5 +1,5 @@ - github.com/se-sic/FeaturePerfCSCollection.git + git@github.com:se-sic/FeaturePerfCSCollection.git unrestricted-range @@ -26,8 +26,8 @@ bug.patch - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e @@ -39,8 +39,8 @@ bug.patch - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e 27f17080376e409860405c40744887d81d6b3f34 @@ -65,8 +65,8 @@ bug.patch - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e @@ -78,8 +78,8 @@ bug.patch - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e 27f17080376e409860405c40744887d81d6b3f34 @@ -94,8 +94,8 @@ bug.patch - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 8ca5cc28e6746eef7340064b5d843631841bf31e @@ -110,14 +110,14 @@ bug.patch - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 4300ea495e7f013f68e785fdde5c4ead81297999 + 01f9f1f07bef22d4248e8349aba4f0c1f204607e + 4300ea495e7f013f68e785fdde5c4ead81297999 - c051e44a973ee31b3baa571407694467a513ba68 - a94fb35ca49719028a1c50bdbc2fb82122043f46 + c051e44a973ee31b3baa571407694467a513ba68 + a94fb35ca49719028a1c50bdbc2fb82122043f46 diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index 4fe6f645a..a90394f6c 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -1,8 +1,15 @@ import unittest from pathlib import Path +import benchbuild as bb +from benchbuild.source.base import target_prefix +from benchbuild.utils.revision_ranges import _get_git_for_path + from tests.helper_utils import TEST_INPUTS_DIR -from varats.provider.patch.patch import ProjectPatches +from varats.provider.patch.patch_provider import ProjectPatchesConfiguration + +from varats.project.project_util import get_local_project_git_path +from varats.utils.git_util import ShortCommitHash class TestPatchProvider(unittest.TestCase): @@ -13,10 +20,26 @@ def test_something(self): class TestPatchConfiguration(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - cls.patch_config = ProjectPatches.from_xml( Path(TEST_INPUTS_DIR/'patch-configs/test-patch-configuration.xml') ) + patch_config = ProjectPatchesConfiguration.from_xml(Path(TEST_INPUTS_DIR / 'patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml')) + cls.patch_config = patch_config + + project_git_source = bb.source.Git( + remote="git@github.com:se-sic/FeaturePerfCSCollection.git", + local="FeaturePerfCSCollection", + refspec="origin/HEAD", + shallow=False, + ) + + project_git_source.fetch() + + repo_git = _get_git_for_path(target_prefix() + "/FeaturePerfCSCollection") + + cls.all_revisions = { ShortCommitHash(h) for h in repo_git('log', '--pretty=%H', '--first-parent').strip().split() } def test_unrestricted_range(self): - pass + patch = self.patch_config.get_by_shortname('unrestricted-range') + + self.assertEqual(patch.valid_revisions, set(self.all_revisions)) def test_included_single_revision(self): pass diff --git a/varats-core/varats/provider/patch/patch.py b/varats-core/varats/provider/patch/patch.py deleted file mode 100644 index 651d83194..000000000 --- a/varats-core/varats/provider/patch/patch.py +++ /dev/null @@ -1,106 +0,0 @@ -import os -from pathlib import Path -import typing as tp -import xml.etree.ElementTree as ET - -from benchbuild.utils.revision_ranges import _get_all_revisions_between, _get_git_for_path, RevisionRange, \ - SingleRevision - -from varats.utils.git_util import CommitHash, ShortCommitHash - - -class Patch: - """A class for storing a single project-specific Patch""" - - def __init__(self, project: str, shortname: str, description: str, path: Path, - valid_revisions: tp.Optional[tp.Set[CommitHash]] = None - , invalid_revisions: tp.Optional[tp.Set[CommitHash]] = None): - self.project: str = project - self.shortname: str = shortname - self.description: str = description - self.path: Path = path - self.valid_revisions: tp.Set[CommitHash] = valid_revisions - self.invalid_revisions: tp.Set[CommitHash] = invalid_revisions - - -class ProjectPatchesConfiguration: - """A class storing a set of patches specific to a project""" - - def __init__(self, project_name: str, repository: str, patches: tp.List[Patch]): - self.project_name: str = project_name - self.repository: str = repository - self.patches: tp.List[Patch] = patches - - def get_patches_for_revision(self, revision: CommitHash): - # This could be more concise with some nested list comprehensions - # But it would make it harder to understand - valid_patches: tp.Set[Patch] = set() - - for patch in self.patches: - add_patch = True - if patch.valid_revisions and revision not in patch.valid_revisions: - add_patch = False - - if patch.invalid_revisions and revision in patch.invalid_revisions: - add_patch = False - - if add_patch: - valid_patches.add(patch) - - @staticmethod - def from_xml(xml_path: Path): - project_name: str = Path(os.path.abspath(xml_path)).parts[-2] - tree = ET.parse(xml_path) - root = tree.getroot() - - if len(root.findall("repository")) != 1: - # TODO: Proper error handling - raise RuntimeError("Only one repository allowed") - - repository = root.findtext("repository") - - repo_git = _get_git_for_path(repository) - patch_list: tp.List[Patch] = [] - - def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: - res: tp.Set[CommitHash] = set() - - for revision_tag in revisions_tag.findall("single_revision"): - res.add(ShortCommitHash(revision_tag.text)) - - for revision_range_tag in revisions_tag.findall("revision_range"): - start_tag = revision_range_tag.find("start") - end_tag = revision_range_tag.find("end") - - res.update( - {ShortCommitHash(h) for h in _get_all_revisions_between(start_tag.text, end_tag.text, repo_git)}) - - return res - - # We explicitly ignore further validity checking of the XML at that point - # As for now, this is already done by a CI Job in the vara-project-patches - # repository - for patch in root.findall("patch"): - shortname = patch.findtext("shortname") - description = patch.findtext("description") - path = Path(patch.findtext("path")) - - include_revisions: tp.Set[CommitHash] = set() - - include_revs_tag = patch.find("include_revisions") - - if include_revs_tag: - include_revisions = parse_revisions(include_revs_tag) - else: - revs_list = repo_git('log', '--pretty="%H"', '--first-parent').strip().split() - - include_revisions.update([ShortCommitHash(rev) for rev in revs_list]) - - exclude_revs_tag = patch.find("exclude_revisions") - - if exclude_revs_tag: - include_revisions.difference_update(parse_revisions(exclude_revs_tag)) - - patch_list.append(Patch(project_name, shortname, description, path, include_revisions)) - - return ProjectPatchesConfiguration(project_name, repository, patch_list) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 300e22c75..0f2cab518 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -1,12 +1,118 @@ +import os +import xml.etree.ElementTree as ET import typing as tp from pathlib import Path import benchbuild as bb from benchbuild.project import Project from benchbuild.source.base import target_prefix +from benchbuild.utils.revision_ranges import _get_all_revisions_between, _get_git_for_path -from varats.provider.patch.patch import Patch, ProjectPatchesConfiguration +from varats.project.project_util import get_local_project_git_path from varats.provider.provider import Provider, ProviderType +from varats.utils.git_util import CommitHash, ShortCommitHash + + +class Patch: + """A class for storing a single project-specific Patch""" + + def __init__(self, project: str, shortname: str, description: str, path: Path, + valid_revisions: tp.Optional[tp.Set[CommitHash]] = None + , invalid_revisions: tp.Optional[tp.Set[CommitHash]] = None): + self.project: str = project + self.shortname: str = shortname + self.description: str = description + self.path: Path = path + self.valid_revisions: tp.Set[CommitHash] = valid_revisions + self.invalid_revisions: tp.Set[CommitHash] = invalid_revisions + + +class ProjectPatchesConfiguration: + """A class storing a set of patches specific to a project""" + + def __init__(self, project_name: str, repository: str, patches: tp.List[Patch]): + self.project_name: str = project_name + self.repository: str = repository + self.patches: tp.List[Patch] = patches + + def get_patches_for_revision(self, revision: CommitHash) -> tp.Set[Patch]: + """Returns all patches that are valid for the given revision""" + + return {patch for patch in self.patches if revision in patch.valid_revisions} + + def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: + """Returns the patch with the given shortname""" + + for patch in self.patches: + if patch.shortname == shortname: + return patch + + return None + + @staticmethod + def from_xml(xml_path: Path): + project_name: str = Path(os.path.abspath(xml_path)).parts[-2] + tree = ET.parse(xml_path) + root = tree.getroot() + + if len(root.findall("repository")) != 1: + # TODO: Proper error handling + raise RuntimeError("Only one repository allowed") + + repository = root.findtext("repository") + + project_git_source = bb.source.Git( + remote=repository, + local=project_name, + refspec="origin/HEAD", + shallow=False, + ) + + project_git_source.fetch() + + repo_git = _get_git_for_path(target_prefix() + "/" + project_name) + patch_list: tp.List[Patch] = [] + + def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: + res: tp.Set[CommitHash] = set() + + for revision_tag in revisions_tag.findall("single_revision"): + res.add(ShortCommitHash(revision_tag.text)) + + for revision_range_tag in revisions_tag.findall("revision_range"): + start_tag = revision_range_tag.find("start") + end_tag = revision_range_tag.find("end") + + res.update( + {ShortCommitHash(h) for h in _get_all_revisions_between(start_tag.text, end_tag.text, repo_git)}) + + return res + + # We explicitly ignore further validity checking of the XML at that point + # As for now, this is already done by a CI Job in the vara-project-patches + # repository + for patch in root.find("patches").findall("patch"): + shortname = patch.findtext("shortname") + description = patch.findtext("description") + path = Path(patch.findtext("path")) + + include_revisions: tp.Set[CommitHash] = set() + + include_revs_tag = patch.find("include_revisions") + + if include_revs_tag: + include_revisions = parse_revisions(include_revs_tag) + else: + include_revisions = { ShortCommitHash(h) for h in repo_git('log', '--pretty=%H', '--first-parent').strip().split() } + + exclude_revs_tag = patch.find("exclude_revisions") + + if exclude_revs_tag: + include_revisions.difference_update(parse_revisions(exclude_revs_tag)) + + patch_list.append(Patch(project_name, shortname, description, path, include_revisions)) + + return ProjectPatchesConfiguration(project_name, repository, patch_list) class PatchesNotFoundError(FileNotFoundError): @@ -50,7 +156,7 @@ def create_default_provider(cls: tp.Type[ProviderType], project: tp.Type[Project def _get_patches_repository_path() -> Path: patches_source = bb.source.Git( remote=PatchProvider.patches_repository, - local="ConfigurableSystems", + local="patch-configurations", refspec="origin/HEAD", limit=1, ) @@ -63,4 +169,3 @@ def _get_patches_repository_path() -> Path: def _parse_patches_config(config_file: Path) -> ProjectPatchesConfiguration: # TODO: Implement XML parsing for patches config pass - From 61cfa543a0fa21ec41fbc8be17a3bac3e5a1a17c Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 27 Jun 2023 12:38:09 +0200 Subject: [PATCH 006/224] * Finished first tests for revision renges for patches --- tests/provider/test_patch_provider.py | 89 ++++++++++++++----- .../varats/provider/patch/patch_provider.py | 2 +- 2 files changed, 69 insertions(+), 22 deletions(-) diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index a90394f6c..26399f5dc 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -8,19 +8,14 @@ from tests.helper_utils import TEST_INPUTS_DIR from varats.provider.patch.patch_provider import ProjectPatchesConfiguration -from varats.project.project_util import get_local_project_git_path from varats.utils.git_util import ShortCommitHash -class TestPatchProvider(unittest.TestCase): - def test_something(self): - self.assertEqual(True, True) - - -class TestPatchConfiguration(unittest.TestCase): +class TestPatchRevisionRanges(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - patch_config = ProjectPatchesConfiguration.from_xml(Path(TEST_INPUTS_DIR / 'patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml')) + patch_config = ProjectPatchesConfiguration.from_xml( + Path(TEST_INPUTS_DIR / 'patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml')) cls.patch_config = patch_config project_git_source = bb.source.Git( @@ -34,33 +29,85 @@ def setUpClass(cls) -> None: repo_git = _get_git_for_path(target_prefix() + "/FeaturePerfCSCollection") - cls.all_revisions = { ShortCommitHash(h) for h in repo_git('log', '--pretty=%H', '--first-parent').strip().split() } + cls.all_revisions = {ShortCommitHash(h) for h in + repo_git('log', '--pretty=%H', '--first-parent').strip().split()} + + def __test_patch_revisions(self, shortname: str, expected_revisions: set[ShortCommitHash]): + patch = self.patch_config.get_by_shortname(shortname) + + self.assertSetEqual(expected_revisions, patch.valid_revisions) def test_unrestricted_range(self): - patch = self.patch_config.get_by_shortname('unrestricted-range') + self.__test_patch_revisions("unrestricted-range", self.all_revisions) + + def test_include_single_revision(self): + expected_revisions = {ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e")} - self.assertEqual(patch.valid_revisions, set(self.all_revisions)) + self.__test_patch_revisions("include-single-revision", expected_revisions) - def test_included_single_revision(self): - pass + def test_include_revision_range(self): + expected_revisions = {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9")} - def test_included_revision_range(self): - pass + self.__test_patch_revisions("include-revision-range", expected_revisions) def test_included_single_and_revision_range(self): - pass + expected_revisions = {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9"), + ShortCommitHash("27f17080376e409860405c40744887d81d6b3f34")} + + self.__test_patch_revisions("include-single-and-revision-range", expected_revisions) def test_exclude_single_revision(self): - pass + expected_revisions = self.all_revisions + expected_revisions.remove(ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e")) + + self.__test_patch_revisions("exclude-single-revision", expected_revisions) def test_exclude_revision_range(self): - pass + expected_revisions = self.all_revisions + expected_revisions.difference_update( + {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9")} + ) + + self.__test_patch_revisions("exclude-revision-range", expected_revisions) def test_exclude_single_and_revision_range(self): - pass + expected_revisions = self.all_revisions + expected_revisions.difference_update( + {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9"), + ShortCommitHash("27f17080376e409860405c40744887d81d6b3f34")}) + + self.__test_patch_revisions("exclude-single-and-revision-range", expected_revisions) def test_include_range_exclude_single(self): - pass + expected_revisions = {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9")} + + self.__test_patch_revisions("include-range-exclude-single", expected_revisions) def test_include_range_exclude_range(self): - pass + expected_revisions = {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("4300ea495e7f013f68e785fdde5c4ead81297999"), + ShortCommitHash("27f17080376e409860405c40744887d81d6b3f34"), + ShortCommitHash("32b28ee90e2475cf44d7a616101bcaba2396168d"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9")} + + self.__test_patch_revisions("include-range-exclude-range", expected_revisions) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 0f2cab518..cd37f9880 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -77,7 +77,7 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: res: tp.Set[CommitHash] = set() for revision_tag in revisions_tag.findall("single_revision"): - res.add(ShortCommitHash(revision_tag.text)) + res.add(ShortCommitHash(revision_tag.text.strip())) for revision_range_tag in revisions_tag.findall("revision_range"): start_tag = revision_range_tag.find("start") From bba044a3754df576887abb795599bdaef0eeee4b Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 27 Jun 2023 13:15:44 +0200 Subject: [PATCH 007/224] * Fixed tests to explicitly copy all revisions --- tests/provider/test_patch_provider.py | 7 ++++--- varats-core/varats/provider/patch/patch_provider.py | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index 26399f5dc..e089a56ee 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -1,4 +1,5 @@ import unittest +from copy import deepcopy from pathlib import Path import benchbuild as bb @@ -65,13 +66,13 @@ def test_included_single_and_revision_range(self): self.__test_patch_revisions("include-single-and-revision-range", expected_revisions) def test_exclude_single_revision(self): - expected_revisions = self.all_revisions + expected_revisions = deepcopy(self.all_revisions) expected_revisions.remove(ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e")) self.__test_patch_revisions("exclude-single-revision", expected_revisions) def test_exclude_revision_range(self): - expected_revisions = self.all_revisions + expected_revisions = deepcopy(self.all_revisions) expected_revisions.difference_update( {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), @@ -83,7 +84,7 @@ def test_exclude_revision_range(self): self.__test_patch_revisions("exclude-revision-range", expected_revisions) def test_exclude_single_and_revision_range(self): - expected_revisions = self.all_revisions + expected_revisions = deepcopy(self.all_revisions) expected_revisions.difference_update( {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index cd37f9880..9e22de59f 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -84,7 +84,9 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: end_tag = revision_range_tag.find("end") res.update( - {ShortCommitHash(h) for h in _get_all_revisions_between(start_tag.text, end_tag.text, repo_git)}) + {ShortCommitHash(h) for h in _get_all_revisions_between(start_tag.text.strip(), + end_tag.text.strip(), + repo_git)}) return res From 12168fa88a44ee87f96b6f57e15f335baa45094d Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 28 Jun 2023 09:47:21 +0200 Subject: [PATCH 008/224] * Added handling for creating project specific providers --- .../varats/provider/patch/patch_provider.py | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 9e22de59f..4e9560ee4 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -17,14 +17,12 @@ class Patch: """A class for storing a single project-specific Patch""" def __init__(self, project: str, shortname: str, description: str, path: Path, - valid_revisions: tp.Optional[tp.Set[CommitHash]] = None - , invalid_revisions: tp.Optional[tp.Set[CommitHash]] = None): + valid_revisions: tp.Optional[tp.Set[CommitHash]] = None): self.project: str = project self.shortname: str = shortname self.description: str = description self.path: Path = path self.valid_revisions: tp.Set[CommitHash] = valid_revisions - self.invalid_revisions: tp.Set[CommitHash] = invalid_revisions class ProjectPatchesConfiguration: @@ -134,7 +132,7 @@ def __init__(self, project: tp.Type[Project]): # TODO: Add proper error message raise PatchesNotFoundError() - patches_config_file = Path(patches_project_dir / "test-patch-configuration.xml") + patches_config_file = Path(patches_project_dir / ".patches.xml") if not patches_config_file.exists(): # TODO: Add proper error handling @@ -142,17 +140,34 @@ def __init__(self, project: tp.Type[Project]): # not the patches itself raise PatchesNotFoundError() - self.project_patches = self._parse_patches_config(patches_config_file) + self.patches_config = ProjectPatchesConfiguration.from_xml(patches_config_file) super().__init__(project) @classmethod def create_provider_for_project(cls: tp.Type[ProviderType], project: tp.Type[Project]) -> tp.Optional[ProviderType]: - pass + """ + Creates a provider instance for the given project if possible. + + Returns: + a provider instance for the given project if possible, + otherwise, ``None`` + """ + try: + return PatchProvider(project) + except PatchesNotFoundError: + # TODO: Warnings? + return None @classmethod def create_default_provider(cls: tp.Type[ProviderType], project: tp.Type[Project]) -> ProviderType: - pass + """ + Creates a default provider instance that can be used with any project. + + Returns: + a default provider instance + """ + raise AssertionError("All usages should be covered by the project specific provider.") @staticmethod def _get_patches_repository_path() -> Path: @@ -165,9 +180,4 @@ def _get_patches_repository_path() -> Path: patches_source.fetch() - return Path(Path(target_prefix()) / patches_source.local) - - @staticmethod - def _parse_patches_config(config_file: Path) -> ProjectPatchesConfiguration: - # TODO: Implement XML parsing for patches config - pass + return Path(Path(target_prefix()) / patches_source.local) \ No newline at end of file From 4fae498040609115dacc4d6c5dd3a2c7ac8ceefa Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 28 Jun 2023 12:38:52 +0200 Subject: [PATCH 009/224] * Added test for getting project specific patch provider --- tests/provider/test_patch_provider.py | 10 +++++++++- varats-core/varats/provider/patch/patch_provider.py | 9 ++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index e089a56ee..65be6b84d 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -7,11 +7,19 @@ from benchbuild.utils.revision_ranges import _get_git_for_path from tests.helper_utils import TEST_INPUTS_DIR -from varats.provider.patch.patch_provider import ProjectPatchesConfiguration +from varats.projects.perf_tests.feature_perf_cs_collection import FeaturePerfCSCollection +from varats.provider.patch.patch_provider import ProjectPatchesConfiguration, PatchProvider from varats.utils.git_util import ShortCommitHash +class TestPatchProvider(unittest.TestCase): + def test_correct_patch_config_access(self): + """Checks if we get a correct path for accessing the PatchConfig.""" + provider = PatchProvider.create_provider_for_project(FeaturePerfCSCollection) + self.assertIsNotNone(provider) + + class TestPatchRevisionRanges(unittest.TestCase): @classmethod def setUpClass(cls) -> None: diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 4e9560ee4..cab0374f0 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -123,11 +123,16 @@ class PatchesNotFoundError(FileNotFoundError): class PatchProvider(Provider): """A provider for getting patch files for a certain project""" - patches_repository = "https://github.com/se-sic/vara-project-patches.git" + patches_repository = "git@github.com:se-sic/vara-project-patches.git" def __init__(self, project: tp.Type[Project]): + super().__init__(project) + patches_project_dir = Path(self._get_patches_repository_path() / self.project.NAME) + # BB only performs a fetch so our repo might be out of date + _get_git_for_path(patches_project_dir)("pull") + if not patches_project_dir.is_dir(): # TODO: Add proper error message raise PatchesNotFoundError() @@ -142,8 +147,6 @@ def __init__(self, project: tp.Type[Project]): self.patches_config = ProjectPatchesConfiguration.from_xml(patches_config_file) - super().__init__(project) - @classmethod def create_provider_for_project(cls: tp.Type[ProviderType], project: tp.Type[Project]) -> tp.Optional[ProviderType]: """ From 8e74338491237f3726aa585075985909d840fa33 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 28 Jun 2023 12:43:00 +0200 Subject: [PATCH 010/224] * Added test for getting a patch by short name --- tests/provider/test_patch_provider.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index 65be6b84d..9f9d51f9c 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -19,6 +19,16 @@ def test_correct_patch_config_access(self): provider = PatchProvider.create_provider_for_project(FeaturePerfCSCollection) self.assertIsNotNone(provider) + def test_get_patch_by_shortname(self): + provider = PatchProvider.create_provider_for_project(FeaturePerfCSCollection) + self.assertIsNotNone(provider) + + patch = provider.patches_config.get_by_shortname("patch-10") + self.assertIsNotNone(patch) + + patch = provider.patches_config.get_by_shortname("dummy-patch") + self.assertIsNone(patch) + class TestPatchRevisionRanges(unittest.TestCase): @classmethod From 8a8ae4b899bb774c1fb43f74ba499434e859a4ca Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 28 Jun 2023 16:13:32 +0200 Subject: [PATCH 011/224] * Added general conceptual outline for running experiments with patches --- .../experiments/base/patch_experiment.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 varats/varats/experiments/base/patch_experiment.py diff --git a/varats/varats/experiments/base/patch_experiment.py b/varats/varats/experiments/base/patch_experiment.py new file mode 100644 index 000000000..91ffa9398 --- /dev/null +++ b/varats/varats/experiments/base/patch_experiment.py @@ -0,0 +1,55 @@ +import typing as tp + +from benchbuild import Project +from benchbuild.utils import actions + +from varats.data.reports.empty_report import EmptyReport +from varats.experiment.experiment_util import VersionExperiment +from varats.experiments.base.just_compile import JustCompileReport +from varats.provider.patch.patch_provider import PatchProvider +from varats.report.report import ReportSpecification +from varats.utils.git_util import ShortCommitHash + +# Placeholder until we figure out how to pass experiment parameters to this +EXPERIMENTS = [JustCompileReport] + + +class ApplyPatch(actions.ProjectStep): + pass + + +class RevertPatch(actions.ProjectStep): + pass + + +class PatchExperiment(VersionExperiment, shorthand="PE"): + """Generates empty report file.""" + + NAME = "PatchExperiment" + REPORT_SPEC = ReportSpecification(EmptyReport) + + def actions_for_project( + self, project: Project + ) -> tp.MutableSequence[actions.Step]: + """Returns the aggregated steps for all experiments and the various patches.""" + analysis_actions = [] + + for experiment in EXPERIMENTS: + # In any case we always want to run the experiment without any patches + analysis_actions.append(actions.RequireAll(experiment.actions_for_project(project))) + + patch_provider = PatchProvider.get_provider_for_project(project.cls) + + # This seems brittle but I don't know how to get the current revision + commit_hash = ShortCommitHash(str(project.revision)) + + patches = patch_provider.patches_config.get_patches_for_revision(commit_hash) + + for patch in patches: + patch_actions = [ApplyPatch(project, patch), + experiment.actions_for_project(project) , + RevertPatch(project, patch)] + + analysis_actions.append(actions.RequireAll(patch_actions)) + + return analysis_actions From 25eda17f7ded9d8557805b9c87c75a598fdfcd74 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Thu, 29 Jun 2023 15:58:44 +0200 Subject: [PATCH 012/224] * Added actions for patch apply/revert --- .../varats/provider/patch/patch_provider.py | 2 + .../experiments/base/patch_experiment.py | 37 +++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index cab0374f0..de1878bb8 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -94,6 +94,8 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: for patch in root.find("patches").findall("patch"): shortname = patch.findtext("shortname") description = patch.findtext("description") + + # TODO: Proper path handling (Absolute/relative) path = Path(patch.findtext("path")) include_revisions: tp.Set[CommitHash] = set() diff --git a/varats/varats/experiments/base/patch_experiment.py b/varats/varats/experiments/base/patch_experiment.py index 91ffa9398..c67780664 100644 --- a/varats/varats/experiments/base/patch_experiment.py +++ b/varats/varats/experiments/base/patch_experiment.py @@ -1,7 +1,10 @@ +import os import typing as tp from benchbuild import Project from benchbuild.utils import actions +from benchbuild.utils.revision_ranges import _get_git_for_path +from benchbuild.utils.actions import StepResult from varats.data.reports.empty_report import EmptyReport from varats.experiment.experiment_util import VersionExperiment @@ -15,11 +18,39 @@ class ApplyPatch(actions.ProjectStep): - pass + NAME = "ApplyPatch" + DESCRIPTION = "Apply a Git patch to a project." + + def __init__(self, project, patch): + super().__init__(project) + self.__patch = patch + + def __call__(self) -> StepResult: + repo_git = _get_git_for_path(os.path.abspath(self.project.builddir)) + + patch_path = self.__patch.path + + repo_git("apply", patch_path) + + return StepResult.OK class RevertPatch(actions.ProjectStep): - pass + NAME = "RevertPatch" + DESCRIPTION = "Revert a Git patch from a project." + + def __init__(self, project, patch): + super().__init__(project) + self.__patch = patch + + def __call__(self) -> StepResult: + repo_git = _get_git_for_path(os.path.abspath(self.project.builddir)) + + patch_path = self.__patch.path + + repo_git("apply", "-R", patch_path) + + return StepResult.OK class PatchExperiment(VersionExperiment, shorthand="PE"): @@ -38,7 +69,7 @@ def actions_for_project( # In any case we always want to run the experiment without any patches analysis_actions.append(actions.RequireAll(experiment.actions_for_project(project))) - patch_provider = PatchProvider.get_provider_for_project(project.cls) + patch_provider = PatchProvider.get_provider_for_project(project) # This seems brittle but I don't know how to get the current revision commit_hash = ShortCommitHash(str(project.revision)) From ae36e8200db4b6e76878b46614e129b02a48740f Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Mon, 3 Jul 2023 13:13:33 +0200 Subject: [PATCH 013/224] * Restructured patch infrastructure as separate experiment --- .../varats/provider/patch/patch_provider.py | 53 +++++++++++- .../experiments/base/patch_experiment.py | 86 ------------------- 2 files changed, 50 insertions(+), 89 deletions(-) delete mode 100644 varats/varats/experiments/base/patch_experiment.py diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index de1878bb8..84133d43c 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -1,17 +1,52 @@ import os import xml.etree.ElementTree as ET import typing as tp +from copy import deepcopy from pathlib import Path import benchbuild as bb from benchbuild.project import Project +from benchbuild.utils import actions from benchbuild.source.base import target_prefix +from benchbuild.utils.actions import StepResult from benchbuild.utils.revision_ranges import _get_all_revisions_between, _get_git_for_path -from varats.project.project_util import get_local_project_git_path from varats.provider.provider import Provider, ProviderType from varats.utils.git_util import CommitHash, ShortCommitHash +class ApplyPatch(actions.ProjectStep): + NAME = "ApplyPatch" + DESCRIPTION = "Apply a Git patch to a project." + + def __init__(self, project, patch): + super().__init__(project) + self.__patch = patch + + def __call__(self) -> StepResult: + repo_git = _get_git_for_path(os.path.abspath(self.project.builddir)) + + patch_path = self.__patch.path + + repo_git("apply", patch_path) + + return StepResult.OK + +class RevertPatch(actions.ProjectStep): + NAME = "RevertPatch" + DESCRIPTION = "Revert a Git patch from a project." + + def __init__(self, project, patch): + super().__init__(project) + self.__patch = patch + + def __call__(self) -> StepResult: + repo_git = _get_git_for_path(os.path.abspath(self.project.builddir)) + + patch_path = self.__patch.path + + repo_git("apply", "-R", patch_path) + + return StepResult.OK class Patch: """A class for storing a single project-specific Patch""" @@ -24,7 +59,6 @@ def __init__(self, project: str, shortname: str, description: str, path: Path, self.path: Path = path self.valid_revisions: tp.Set[CommitHash] = valid_revisions - class ProjectPatchesConfiguration: """A class storing a set of patches specific to a project""" @@ -185,4 +219,17 @@ def _get_patches_repository_path() -> Path: patches_source.fetch() - return Path(Path(target_prefix()) / patches_source.local) \ No newline at end of file + return Path(Path(target_prefix()) / patches_source.local) + + +def create_patch_action_list(project: Project, standard_actions: tp.MutableSequence[actions.Step], hash: CommitHash) -> tp.Mapping[str, tp.MutableSequence[actions.Step]]: + """ Creates a map of actions for applying and reverting all patches that are valid for the given revision """ + result_actions = {} + + patch_provider = PatchProvider.create_provider_for_project(project) + patches = patch_provider.patches_config.get_patches_for_revision(hash) + + for patch in patches: + result_actions[patch.shortname] = [ApplyPatch(project,patch), *standard_actions, RevertPatch(project,patch)] + + return result_actions diff --git a/varats/varats/experiments/base/patch_experiment.py b/varats/varats/experiments/base/patch_experiment.py deleted file mode 100644 index c67780664..000000000 --- a/varats/varats/experiments/base/patch_experiment.py +++ /dev/null @@ -1,86 +0,0 @@ -import os -import typing as tp - -from benchbuild import Project -from benchbuild.utils import actions -from benchbuild.utils.revision_ranges import _get_git_for_path -from benchbuild.utils.actions import StepResult - -from varats.data.reports.empty_report import EmptyReport -from varats.experiment.experiment_util import VersionExperiment -from varats.experiments.base.just_compile import JustCompileReport -from varats.provider.patch.patch_provider import PatchProvider -from varats.report.report import ReportSpecification -from varats.utils.git_util import ShortCommitHash - -# Placeholder until we figure out how to pass experiment parameters to this -EXPERIMENTS = [JustCompileReport] - - -class ApplyPatch(actions.ProjectStep): - NAME = "ApplyPatch" - DESCRIPTION = "Apply a Git patch to a project." - - def __init__(self, project, patch): - super().__init__(project) - self.__patch = patch - - def __call__(self) -> StepResult: - repo_git = _get_git_for_path(os.path.abspath(self.project.builddir)) - - patch_path = self.__patch.path - - repo_git("apply", patch_path) - - return StepResult.OK - - -class RevertPatch(actions.ProjectStep): - NAME = "RevertPatch" - DESCRIPTION = "Revert a Git patch from a project." - - def __init__(self, project, patch): - super().__init__(project) - self.__patch = patch - - def __call__(self) -> StepResult: - repo_git = _get_git_for_path(os.path.abspath(self.project.builddir)) - - patch_path = self.__patch.path - - repo_git("apply", "-R", patch_path) - - return StepResult.OK - - -class PatchExperiment(VersionExperiment, shorthand="PE"): - """Generates empty report file.""" - - NAME = "PatchExperiment" - REPORT_SPEC = ReportSpecification(EmptyReport) - - def actions_for_project( - self, project: Project - ) -> tp.MutableSequence[actions.Step]: - """Returns the aggregated steps for all experiments and the various patches.""" - analysis_actions = [] - - for experiment in EXPERIMENTS: - # In any case we always want to run the experiment without any patches - analysis_actions.append(actions.RequireAll(experiment.actions_for_project(project))) - - patch_provider = PatchProvider.get_provider_for_project(project) - - # This seems brittle but I don't know how to get the current revision - commit_hash = ShortCommitHash(str(project.revision)) - - patches = patch_provider.patches_config.get_patches_for_revision(commit_hash) - - for patch in patches: - patch_actions = [ApplyPatch(project, patch), - experiment.actions_for_project(project) , - RevertPatch(project, patch)] - - analysis_actions.append(actions.RequireAll(patch_actions)) - - return analysis_actions From 074f4f2ce807fc79029aa0656af472b44bc36e2e Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Mon, 3 Jul 2023 14:16:21 +0200 Subject: [PATCH 014/224] * Added patch support to JustCompile Project * Added patch_name parameter to report file creation utility functions --- .../varats/experiment/experiment_util.py | 23 +++++++++----- .../varats/provider/patch/patch_provider.py | 18 +++++++++-- varats-core/varats/report/report.py | 30 +++++++++++++------ .../varats/experiments/base/just_compile.py | 29 ++++++++++++++---- 4 files changed, 74 insertions(+), 26 deletions(-) diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index d70af5353..6f7fe2c08 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -306,7 +306,8 @@ def get_file_name( project_revision: ShortCommitHash, project_uuid: str, extension_type: FileStatusExtension, - config_id: tp.Optional[int] = None + config_id: tp.Optional[int] = None, + patch_name: tp.Optional[str] = None ) -> ReportFilename: """ Generates a filename for a report file that is generated by the @@ -327,7 +328,8 @@ def get_file_name( return self.__experiment.report_spec( ).get_report_type(report_shorthand).get_file_name( self.__experiment.shorthand(), project_name, binary_name, - project_revision, project_uuid, extension_type, config_id + project_revision, project_uuid, extension_type, config_id, + patch_name ) def report_spec(self) -> ReportSpecification: @@ -575,7 +577,8 @@ def __create_new_result_filepath_impl( project: VProject, binary: ProjectBinaryWrapper, extension_type: FileStatusExtension, - config_id: tp.Optional[int] = None + config_id: tp.Optional[int] = None, + patch_name: tp.Optional[str] = None ) -> ReportFilepath: """ Create a result filepath for the specified file extension and report of the @@ -602,7 +605,8 @@ def __create_new_result_filepath_impl( project_revision=ShortCommitHash(project.version_of_primary), project_uuid=str(project.run_uuid), extension_type=extension_type, - config_id=config_id + config_id=config_id, + patch_name=patch_name ) ) @@ -620,7 +624,8 @@ def create_new_success_result_filepath( report_type: tp.Type[BaseReport], project: VProject, binary: ProjectBinaryWrapper, - config_id: tp.Optional[int] = None + config_id: tp.Optional[int] = None, + patch_name: tp.Optional[str] = None ) -> ReportFilepath: """ Create a result filepath for a successfull report of the executed @@ -632,12 +637,13 @@ def create_new_success_result_filepath( project: current project binary: current binary config_id: optional id to specify the used configuration + patch_name: optional name of the patch that was applied Returns: formatted success filepath """ return __create_new_result_filepath_impl( exp_handle, report_type, project, binary, FileStatusExtension.SUCCESS, - config_id + config_id, patch_name ) @@ -646,7 +652,8 @@ def create_new_failed_result_filepath( report_type: tp.Type[BaseReport], project: VProject, binary: ProjectBinaryWrapper, - config_id: tp.Optional[int] = None + config_id: tp.Optional[int] = None, + patch_name: tp.Optional[str] = None ) -> ReportFilepath: """ Create a result filepath for a failed report of the executed @@ -663,7 +670,7 @@ def create_new_failed_result_filepath( """ return __create_new_result_filepath_impl( exp_handle, report_type, project, binary, FileStatusExtension.FAILED, - config_id + config_id, patch_name ) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 84133d43c..1efc97d05 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -14,6 +14,7 @@ from varats.provider.provider import Provider, ProviderType from varats.utils.git_util import CommitHash, ShortCommitHash + class ApplyPatch(actions.ProjectStep): NAME = "ApplyPatch" DESCRIPTION = "Apply a Git patch to a project." @@ -31,6 +32,7 @@ def __call__(self) -> StepResult: return StepResult.OK + class RevertPatch(actions.ProjectStep): NAME = "RevertPatch" DESCRIPTION = "Revert a Git patch from a project." @@ -48,6 +50,7 @@ def __call__(self) -> StepResult: return StepResult.OK + class Patch: """A class for storing a single project-specific Patch""" @@ -59,6 +62,7 @@ def __init__(self, project: str, shortname: str, description: str, path: Path, self.path: Path = path self.valid_revisions: tp.Set[CommitHash] = valid_revisions + class ProjectPatchesConfiguration: """A class storing a set of patches specific to a project""" @@ -139,7 +143,8 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: if include_revs_tag: include_revisions = parse_revisions(include_revs_tag) else: - include_revisions = { ShortCommitHash(h) for h in repo_git('log', '--pretty=%H', '--first-parent').strip().split() } + include_revisions = {ShortCommitHash(h) for h in + repo_git('log', '--pretty=%H', '--first-parent').strip().split()} exclude_revs_tag = patch.find("exclude_revisions") @@ -222,7 +227,8 @@ def _get_patches_repository_path() -> Path: return Path(Path(target_prefix()) / patches_source.local) -def create_patch_action_list(project: Project, standard_actions: tp.MutableSequence[actions.Step], hash: CommitHash) -> tp.Mapping[str, tp.MutableSequence[actions.Step]]: +def create_patch_action_list(project: Project, standard_actions: tp.MutableSequence[actions.Step], hash: CommitHash) -> \ +tp.Mapping[str, tp.MutableSequence[actions.Step]]: """ Creates a map of actions for applying and reverting all patches that are valid for the given revision """ result_actions = {} @@ -230,6 +236,12 @@ def create_patch_action_list(project: Project, standard_actions: tp.MutableSeque patches = patch_provider.patches_config.get_patches_for_revision(hash) for patch in patches: - result_actions[patch.shortname] = [ApplyPatch(project,patch), *standard_actions, RevertPatch(project,patch)] + result_actions[patch.shortname] = [ApplyPatch(project, patch), *standard_actions, RevertPatch(project, patch)] return result_actions + + +def wrap_action_list_with_patch(action_list: tp.MutableSequence[actions.Step], patch: Patch) -> tp.MutableSequence[ + actions.Step]: + """ Wraps the given action list with the given patch """ + return [ApplyPatch(patch.project, patch), *action_list, RevertPatch(patch.project, patch)] diff --git a/varats-core/varats/report/report.py b/varats-core/varats/report/report.py index d454b1ee6..5d63efd74 100644 --- a/varats-core/varats/report/report.py +++ b/varats-core/varats/report/report.py @@ -147,21 +147,21 @@ class ReportFilename(): r"(?P.*)-" + r"(?P.*)-" + r"(?P.*)-(?P.*)-" + r"(?P.*)[_\/](?P[0-9a-fA-F\-]*)" - r"(_config-(?P\d+))?" + "_" + + r"(_config-(?P\d+))?" + r"(_patch-(?P.+))?" + "_" + FileStatusExtension.get_regex_grp() + r"?" + r"(?P\..*)?" + "$" ) __RESULT_FILE_TEMPLATE = ( "{experiment_shorthand}-" + "{report_shorthand}-" + "{project_name}-" + - "{binary_name}-" + "{project_revision}_" + "{project_uuid}_" + + "{binary_name}-" + "{project_revision}_" + "{project_uuid}_{patch_name}" + "{status_ext}" + "{file_ext}" ) __CONFIG_SPECIFIC_RESULT_FILE_TEMPLATE = ( "{experiment_shorthand}-" + "{report_shorthand}-" + "{project_name}-" + "{binary_name}-" + "{project_revision}/" + "{project_uuid}" + - "_config-{config_id}_" + "{status_ext}" + "{file_ext}" + "_{patch-name}config-{config_id}_" + "{status_ext}" + "{file_ext}" ) def __init__(self, file_name: tp.Union[str, Path]) -> None: @@ -396,7 +396,8 @@ def get_file_name( project_uuid: str, extension_type: FileStatusExtension, file_ext: str = ".txt", - config_id: tp.Optional[int] = None + config_id: tp.Optional[int] = None, + patch_name: tp.Optional[str] = None ) -> 'ReportFilename': """ Generates a filename for a report file out the different parts. @@ -412,7 +413,7 @@ def get_file_name( file_ext: file extension of the report file Returns: - name for the report file that can later be uniquly identified + name for the report file that can later be uniquely identified """ status_ext = FileStatusExtension.get_status_extension(extension_type) @@ -420,6 +421,14 @@ def get_file_name( if file_ext and not file_ext.startswith("."): file_ext = "." + file_ext + if patch_name is not None: + patch_name = f"patch-{patch_name}" + + if not patch_name.endswith("_"): + patch_name = patch_name + "_" + else: + patch_name = "" + if config_id is not None: return ReportFilename( ReportFilename.__CONFIG_SPECIFIC_RESULT_FILE_TEMPLATE.format( @@ -431,7 +440,8 @@ def get_file_name( project_uuid=project_uuid, status_ext=status_ext, config_id=config_id, - file_ext=file_ext + file_ext=file_ext, + patch_name=patch_name ) ) @@ -444,7 +454,8 @@ def get_file_name( project_revision=project_revision, project_uuid=project_uuid, status_ext=status_ext, - file_ext=file_ext + file_ext=file_ext, + patch_name=patch_name ) ) @@ -596,7 +607,8 @@ def get_file_name( project_revision: ShortCommitHash, project_uuid: str, extension_type: FileStatusExtension, - config_id: tp.Optional[int] = None + config_id: tp.Optional[int] = None, + patch_name: tp.Optional[str] = None ) -> ReportFilename: """ Generates a filename for a report file. @@ -615,7 +627,7 @@ def get_file_name( return ReportFilename.get_file_name( experiment_shorthand, cls.SHORTHAND, project_name, binary_name, project_revision, project_uuid, extension_type, cls.FILE_TYPE, - config_id + config_id, patch_name ) @property diff --git a/varats/varats/experiments/base/just_compile.py b/varats/varats/experiments/base/just_compile.py index 0531bd878..50cc026a1 100644 --- a/varats/varats/experiments/base/just_compile.py +++ b/varats/varats/experiments/base/just_compile.py @@ -19,6 +19,7 @@ ) from varats.experiment.wllvm import RunWLLVM from varats.project.varats_project import VProject +from varats.provider.patch.patch_provider import Patch, PatchProvider, wrap_action_list_with_patch from varats.report.report import ReportSpecification @@ -31,9 +32,10 @@ class EmptyAnalysis(actions.ProjectStep): # type: ignore project: VProject - def __init__(self, project: Project, experiment_handle: ExperimentHandle): + def __init__(self, project: Project, experiment_handle: ExperimentHandle, patch: Patch = None): super().__init__(project=project) self.__experiment_handle = experiment_handle + self.__patch = patch def __call__(self) -> actions.StepResult: return self.analyze() @@ -46,7 +48,7 @@ def analyze(self) -> actions.StepResult: for binary in self.project.binaries: result_file = create_new_success_result_filepath( self.__experiment_handle, EmptyReport, self.project, binary, - config_id + config_id, self.__patch.shortname if self.__patch else None ) run_cmd = touch[f"{result_file}"] @@ -69,20 +71,23 @@ class JustCompileReport(VersionExperiment, shorthand="JC"): REPORT_SPEC = ReportSpecification(EmptyReport) + # WIP Patch Support + __USE_PATCHES = True + def actions_for_project( - self, project: Project + self, project: Project ) -> tp.MutableSequence[actions.Step]: """Returns the specified steps to run the project(s) specified in the call in a fixed order.""" # Add the required runtime extensions to the project(s). project.runtime_extension = run.RuntimeExtension(project, self) \ - << time.RunWithTime() + << time.RunWithTime() # Add the required compiler extensions to the project(s). project.compiler_extension = compiler.RunCompiler(project, self) \ - << RunWLLVM() \ - << run.WithTimeout() + << RunWLLVM() \ + << run.WithTimeout() project.compile = get_default_compile_error_wrapped( self.get_handle(), project, self.REPORT_SPEC.main_report @@ -93,4 +98,16 @@ def actions_for_project( analysis_actions.append(EmptyAnalysis(project, self.get_handle())) analysis_actions.append(actions.Clean(project)) + if self.__USE_PATCHES: + patch_provider = PatchProvider.create_provider_for_project(project) + patches = patch_provider.patches_config.get_patches_for_revision(hash) + + for patch in patches: + patch_actions = [actions.Compile(project), + EmptyAnalysis(project, self.get_handle(), patch=patch), + actions.Clean(project)] + + analysis_actions.append(actions.RequireAll(wrap_action_list_with_patch(patch_actions, patch))) + pass + return analysis_actions From e5d6e5f6e7d439217a2c384e138607ede3fe2d87 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Mon, 3 Jul 2023 15:19:10 +0200 Subject: [PATCH 015/224] * Added proper path handling to PatchProvider --- varats-core/varats/provider/patch/patch_provider.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 1efc97d05..781777794 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -87,6 +87,8 @@ def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: @staticmethod def from_xml(xml_path: Path): + base_dir = xml_path.parent + project_name: str = Path(os.path.abspath(xml_path)).parts[-2] tree = ET.parse(xml_path) root = tree.getroot() @@ -133,9 +135,11 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: shortname = patch.findtext("shortname") description = patch.findtext("description") - # TODO: Proper path handling (Absolute/relative) path = Path(patch.findtext("path")) + if not path.is_absolute(): + path = base_dir / path + include_revisions: tp.Set[CommitHash] = set() include_revs_tag = patch.find("include_revisions") @@ -200,7 +204,7 @@ def create_provider_for_project(cls: tp.Type[ProviderType], project: tp.Type[Pro try: return PatchProvider(project) except PatchesNotFoundError: - # TODO: Warnings? + # TODO: Warnings return None @classmethod From 7ac995d2b9eac0c2b9562579f0949a77703d0b0e Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Mon, 3 Jul 2023 16:55:40 +0200 Subject: [PATCH 016/224] * Small prototype for just compile project --- varats-core/varats/provider/patch/patch_provider.py | 13 +++++++------ varats/varats/experiments/base/just_compile.py | 13 +++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 781777794..5086be4e2 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -10,6 +10,7 @@ from benchbuild.source.base import target_prefix from benchbuild.utils.actions import StepResult from benchbuild.utils.revision_ranges import _get_all_revisions_between, _get_git_for_path +from plumbum import local from varats.provider.provider import Provider, ProviderType from varats.utils.git_util import CommitHash, ShortCommitHash @@ -24,7 +25,7 @@ def __init__(self, project, patch): self.__patch = patch def __call__(self) -> StepResult: - repo_git = _get_git_for_path(os.path.abspath(self.project.builddir)) + repo_git = _get_git_for_path(local.path(self.project.source_of(self.project.primary_source))) patch_path = self.__patch.path @@ -42,7 +43,7 @@ def __init__(self, project, patch): self.__patch = patch def __call__(self) -> StepResult: - repo_git = _get_git_for_path(os.path.abspath(self.project.builddir)) + repo_git = _get_git_for_path(local.path(self.project.source_of(self.project.primary_source))) patch_path = self.__patch.path @@ -54,9 +55,9 @@ def __call__(self) -> StepResult: class Patch: """A class for storing a single project-specific Patch""" - def __init__(self, project: str, shortname: str, description: str, path: Path, + def __init__(self, project_name: str, shortname: str, description: str, path: Path, valid_revisions: tp.Optional[tp.Set[CommitHash]] = None): - self.project: str = project + self.project_name: str = project_name self.shortname: str = shortname self.description: str = description self.path: Path = path @@ -245,7 +246,7 @@ def create_patch_action_list(project: Project, standard_actions: tp.MutableSeque return result_actions -def wrap_action_list_with_patch(action_list: tp.MutableSequence[actions.Step], patch: Patch) -> tp.MutableSequence[ +def wrap_action_list_with_patch(action_list: tp.MutableSequence[actions.Step], project: Project, patch: Patch) -> tp.MutableSequence[ actions.Step]: """ Wraps the given action list with the given patch """ - return [ApplyPatch(patch.project, patch), *action_list, RevertPatch(patch.project, patch)] + return [ApplyPatch(project, patch), *action_list, RevertPatch(project, patch)] diff --git a/varats/varats/experiments/base/just_compile.py b/varats/varats/experiments/base/just_compile.py index 50cc026a1..c6ad80a3f 100644 --- a/varats/varats/experiments/base/just_compile.py +++ b/varats/varats/experiments/base/just_compile.py @@ -21,6 +21,7 @@ from varats.project.varats_project import VProject from varats.provider.patch.patch_provider import Patch, PatchProvider, wrap_action_list_with_patch from varats.report.report import ReportSpecification +from varats.utils.git_util import ShortCommitHash # Please take care when changing this file, see docs experiments/just_compile @@ -96,18 +97,18 @@ def actions_for_project( analysis_actions = [] analysis_actions.append(actions.Compile(project)) analysis_actions.append(EmptyAnalysis(project, self.get_handle())) - analysis_actions.append(actions.Clean(project)) if self.__USE_PATCHES: + patch_provider = PatchProvider.create_provider_for_project(project) - patches = patch_provider.patches_config.get_patches_for_revision(hash) + patches = patch_provider.patches_config.get_patches_for_revision(ShortCommitHash(str(project.revision))) for patch in patches: patch_actions = [actions.Compile(project), - EmptyAnalysis(project, self.get_handle(), patch=patch), - actions.Clean(project)] + EmptyAnalysis(project, self.get_handle(), patch=patch)] - analysis_actions.append(actions.RequireAll(wrap_action_list_with_patch(patch_actions, patch))) - pass + analysis_actions.append(actions.RequireAll(wrap_action_list_with_patch(patch_actions, project, patch))) + + analysis_actions.append(actions.Clean(project)) return analysis_actions From 34141bd4b0a4a5295d0a4b014ffd78e1a71338ce Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 4 Jul 2023 10:59:50 +0200 Subject: [PATCH 017/224] * Added proper descriptions to analysis actions --- varats-core/varats/provider/patch/patch_provider.py | 9 +++++++++ varats/varats/experiments/base/just_compile.py | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 5086be4e2..1b4852d19 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -1,4 +1,5 @@ import os +import textwrap import xml.etree.ElementTree as ET import typing as tp from copy import deepcopy @@ -33,6 +34,10 @@ def __call__(self) -> StepResult: return StepResult.OK + def __str__(self, indent: int = 0)->str: + return textwrap.indent(f"* {self.project.name}: Apply the patch '{self.__patch.shortname}' to the project.", + " " * indent) + class RevertPatch(actions.ProjectStep): NAME = "RevertPatch" @@ -51,6 +56,10 @@ def __call__(self) -> StepResult: return StepResult.OK + def __str__(self, indent: int = 0) -> str: + return textwrap.indent(f"* {self.project.name}: Revert the patch '{self.__patch.shortname}' from the project.", + " " * indent) + class Patch: """A class for storing a single project-specific Patch""" diff --git a/varats/varats/experiments/base/just_compile.py b/varats/varats/experiments/base/just_compile.py index c6ad80a3f..d42a0743b 100644 --- a/varats/varats/experiments/base/just_compile.py +++ b/varats/varats/experiments/base/just_compile.py @@ -1,5 +1,5 @@ """Implements an empty experiment that just compiles the project.""" - +import textwrap import typing as tp from benchbuild import Project @@ -41,6 +41,9 @@ def __init__(self, project: Project, experiment_handle: ExperimentHandle, patch: def __call__(self) -> actions.StepResult: return self.analyze() + def __str__(self, indent: int=0) -> str : + return textwrap.indent(f"* {self.project.name}: EmptyAnalysis", " " * indent) + def analyze(self) -> actions.StepResult: """Only create a report file.""" From 0cf1e90b4892b7cb6f2a9a493ac7af2a2dfaa8fa Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 4 Jul 2023 11:00:27 +0200 Subject: [PATCH 018/224] * Updated patch integration --- varats-core/varats/provider/patch/patch_provider.py | 2 +- varats/varats/experiments/base/just_compile.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 1b4852d19..63c777edc 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -258,4 +258,4 @@ def create_patch_action_list(project: Project, standard_actions: tp.MutableSeque def wrap_action_list_with_patch(action_list: tp.MutableSequence[actions.Step], project: Project, patch: Patch) -> tp.MutableSequence[ actions.Step]: """ Wraps the given action list with the given patch """ - return [ApplyPatch(project, patch), *action_list, RevertPatch(project, patch)] + return [actions.MakeBuildDir(project), actions.ProjectEnvironment(project), ApplyPatch(project, patch), *action_list] diff --git a/varats/varats/experiments/base/just_compile.py b/varats/varats/experiments/base/just_compile.py index d42a0743b..f0b6a6bac 100644 --- a/varats/varats/experiments/base/just_compile.py +++ b/varats/varats/experiments/base/just_compile.py @@ -100,6 +100,7 @@ def actions_for_project( analysis_actions = [] analysis_actions.append(actions.Compile(project)) analysis_actions.append(EmptyAnalysis(project, self.get_handle())) + analysis_actions.append(actions.Clean(project)) if self.__USE_PATCHES: @@ -108,10 +109,10 @@ def actions_for_project( for patch in patches: patch_actions = [actions.Compile(project), - EmptyAnalysis(project, self.get_handle(), patch=patch)] + EmptyAnalysis(project, self.get_handle(), patch=patch), + actions.Clean(project)] analysis_actions.append(actions.RequireAll(wrap_action_list_with_patch(patch_actions, project, patch))) - analysis_actions.append(actions.Clean(project)) return analysis_actions From b8655a66b64a13e25f4282dee7e66d4dad9b3e25 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 4 Jul 2023 12:31:22 +0200 Subject: [PATCH 019/224] * Formatting and linting --- .../varats/provider/patch/patch_provider.py | 116 ++++++++++++------ .../varats/experiments/base/just_compile.py | 28 ++++- 2 files changed, 100 insertions(+), 44 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 63c777edc..590af7081 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -2,7 +2,6 @@ import textwrap import xml.etree.ElementTree as ET import typing as tp -from copy import deepcopy from pathlib import Path import benchbuild as bb @@ -10,14 +9,24 @@ from benchbuild.utils import actions from benchbuild.source.base import target_prefix from benchbuild.utils.actions import StepResult -from benchbuild.utils.revision_ranges import _get_all_revisions_between, _get_git_for_path +from benchbuild.utils.revision_ranges import ( + _get_all_revisions_between, + _get_git_for_path +) from plumbum import local from varats.provider.provider import Provider, ProviderType from varats.utils.git_util import CommitHash, ShortCommitHash +def __get_project_git(project: Project) -> tp.Optional[local.cmd]: + return _get_git_for_path( + local.path(project.source_of(project.primary_source))) + + class ApplyPatch(actions.ProjectStep): + """Apply a patch to a project.""" + NAME = "ApplyPatch" DESCRIPTION = "Apply a Git patch to a project." @@ -26,7 +35,7 @@ def __init__(self, project, patch): self.__patch = patch def __call__(self) -> StepResult: - repo_git = _get_git_for_path(local.path(self.project.source_of(self.project.primary_source))) + repo_git = __get_project_git(self.project) patch_path = self.__patch.path @@ -34,12 +43,16 @@ def __call__(self) -> StepResult: return StepResult.OK - def __str__(self, indent: int = 0)->str: - return textwrap.indent(f"* {self.project.name}: Apply the patch '{self.__patch.shortname}' to the project.", + def __str__(self, indent: int = 0) -> str: + return textwrap.indent(f"* {self.project.name}: " + f"Apply the patch " + f"'{self.__patch.shortname}' to the project.", " " * indent) class RevertPatch(actions.ProjectStep): + """Revert a patch from a project.""" + NAME = "RevertPatch" DESCRIPTION = "Revert a Git patch from a project." @@ -48,7 +61,7 @@ def __init__(self, project, patch): self.__patch = patch def __call__(self) -> StepResult: - repo_git = _get_git_for_path(local.path(self.project.source_of(self.project.primary_source))) + repo_git = __get_project_git(self.project) patch_path = self.__patch.path @@ -57,26 +70,33 @@ def __call__(self) -> StepResult: return StepResult.OK def __str__(self, indent: int = 0) -> str: - return textwrap.indent(f"* {self.project.name}: Revert the patch '{self.__patch.shortname}' from the project.", + return textwrap.indent(f"* {self.project.name}: " + f"Revert the patch '{self.__patch.shortname}' " + f"from the project.", " " * indent) class Patch: """A class for storing a single project-specific Patch""" - def __init__(self, project_name: str, shortname: str, description: str, path: Path, + def __init__(self, project_name: str, + shortname: str, + description: str, + path: Path, valid_revisions: tp.Optional[tp.Set[CommitHash]] = None): self.project_name: str = project_name self.shortname: str = shortname self.description: str = description self.path: Path = path - self.valid_revisions: tp.Set[CommitHash] = valid_revisions + self.valid_revisions: tp.Optional[tp.Set[CommitHash]] = valid_revisions class ProjectPatchesConfiguration: """A class storing a set of patches specific to a project""" - def __init__(self, project_name: str, repository: str, patches: tp.List[Patch]): + def __init__(self, project_name: str, + repository: str, + patches: tp.List[Patch]): self.project_name: str = project_name self.repository: str = repository self.patches: tp.List[Patch] = patches @@ -84,7 +104,7 @@ def __init__(self, project_name: str, repository: str, patches: tp.List[Patch]): def get_patches_for_revision(self, revision: CommitHash) -> tp.Set[Patch]: """Returns all patches that are valid for the given revision""" - return {patch for patch in self.patches if revision in patch.valid_revisions} + return {p for p in self.patches if revision in p.valid_revisions} def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: """Returns the patch with the given shortname""" @@ -97,6 +117,8 @@ def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: @staticmethod def from_xml(xml_path: Path): + """Creates a ProjectPatchesConfiguration from an XML file""" + base_dir = xml_path.parent project_name: str = Path(os.path.abspath(xml_path)).parts[-2] @@ -132,15 +154,15 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: end_tag = revision_range_tag.find("end") res.update( - {ShortCommitHash(h) for h in _get_all_revisions_between(start_tag.text.strip(), - end_tag.text.strip(), - repo_git)}) + {ShortCommitHash(h) for h in + _get_all_revisions_between(start_tag.text.strip(), + end_tag.text.strip(), + repo_git)}) return res - # We explicitly ignore further validity checking of the XML at that point - # As for now, this is already done by a CI Job in the vara-project-patches - # repository + # We explicitly ignore further validity checking of the XML + # As for now, this is already done by a CI Job for patch in root.find("patches").findall("patch"): shortname = patch.findtext("shortname") description = patch.findtext("description") @@ -158,14 +180,21 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: include_revisions = parse_revisions(include_revs_tag) else: include_revisions = {ShortCommitHash(h) for h in - repo_git('log', '--pretty=%H', '--first-parent').strip().split()} + repo_git('log', + '--pretty=%H', + '--first-parent').strip().split()} exclude_revs_tag = patch.find("exclude_revisions") if exclude_revs_tag: - include_revisions.difference_update(parse_revisions(exclude_revs_tag)) + revs = parse_revisions(exclude_revs_tag) + include_revisions.difference_update(revs) - patch_list.append(Patch(project_name, shortname, description, path, include_revisions)) + patch_list.append(Patch(project_name, + shortname, + description, + path, + include_revisions)) return ProjectPatchesConfiguration(project_name, repository, patch_list) @@ -183,27 +212,27 @@ class PatchProvider(Provider): def __init__(self, project: tp.Type[Project]): super().__init__(project) - patches_project_dir = Path(self._get_patches_repository_path() / self.project.NAME) + patches_project_dir = Path(self._get_patches_repository_path() + / self.project.NAME) # BB only performs a fetch so our repo might be out of date _get_git_for_path(patches_project_dir)("pull") if not patches_project_dir.is_dir(): - # TODO: Add proper error message + # TODO: Error handling/warning and None raise PatchesNotFoundError() - patches_config_file = Path(patches_project_dir / ".patches.xml") + conf_file = Path(patches_project_dir / ".patches.xml") - if not patches_config_file.exists(): - # TODO: Add proper error handling - # This should probably be a different error since it is related to the patches config - # not the patches itself + if not conf_file.exists(): + # TODO: Error handling/warning and None raise PatchesNotFoundError() - self.patches_config = ProjectPatchesConfiguration.from_xml(patches_config_file) + self.patches_config = ProjectPatchesConfiguration.from_xml(conf_file) @classmethod - def create_provider_for_project(cls: tp.Type[ProviderType], project: tp.Type[Project]) -> tp.Optional[ProviderType]: + def create_provider_for_project(cls: tp.Type[ProviderType], + project: tp.Type[Project]): """ Creates a provider instance for the given project if possible. @@ -218,7 +247,8 @@ def create_provider_for_project(cls: tp.Type[ProviderType], project: tp.Type[Pro return None @classmethod - def create_default_provider(cls: tp.Type[ProviderType], project: tp.Type[Project]) -> ProviderType: + def create_default_provider(cls: tp.Type[ProviderType], + project: tp.Type[Project]): """ Creates a default provider instance that can be used with any project. @@ -241,21 +271,31 @@ def _get_patches_repository_path() -> Path: return Path(Path(target_prefix()) / patches_source.local) -def create_patch_action_list(project: Project, standard_actions: tp.MutableSequence[actions.Step], hash: CommitHash) -> \ -tp.Mapping[str, tp.MutableSequence[actions.Step]]: - """ Creates a map of actions for applying and reverting all patches that are valid for the given revision """ +def create_patch_action_list(project: Project, + standard_actions: tp.MutableSequence[actions.Step], + commit: CommitHash) \ + -> tp.Mapping[str, tp.MutableSequence[actions.Step]]: + """ Creates a map of actions for applying + all patches that are valid for the given revision """ result_actions = {} patch_provider = PatchProvider.create_provider_for_project(project) - patches = patch_provider.patches_config.get_patches_for_revision(hash) + patches = patch_provider.patches_config.get_patches_for_revision(commit) for patch in patches: - result_actions[patch.shortname] = [ApplyPatch(project, patch), *standard_actions, RevertPatch(project, patch)] + result_actions[patch.shortname] = [actions.MakeBuildDir(project), + actions.ProjectEnvironment(project), + ApplyPatch(project, patch), + *standard_actions] return result_actions -def wrap_action_list_with_patch(action_list: tp.MutableSequence[actions.Step], project: Project, patch: Patch) -> tp.MutableSequence[ - actions.Step]: +def wrap_action_list_with_patch(action_list: tp.MutableSequence[actions.Step], + project: Project, patch: Patch) \ + -> tp.MutableSequence[actions.Step]: """ Wraps the given action list with the given patch """ - return [actions.MakeBuildDir(project), actions.ProjectEnvironment(project), ApplyPatch(project, patch), *action_list] + return [actions.MakeBuildDir(project), + actions.ProjectEnvironment(project), + ApplyPatch(project, patch), + *action_list] diff --git a/varats/varats/experiments/base/just_compile.py b/varats/varats/experiments/base/just_compile.py index f0b6a6bac..4f8b1cc84 100644 --- a/varats/varats/experiments/base/just_compile.py +++ b/varats/varats/experiments/base/just_compile.py @@ -19,7 +19,11 @@ ) from varats.experiment.wllvm import RunWLLVM from varats.project.varats_project import VProject -from varats.provider.patch.patch_provider import Patch, PatchProvider, wrap_action_list_with_patch +from varats.provider.patch.patch_provider import ( + Patch, + PatchProvider, + wrap_action_list_with_patch +) from varats.report.report import ReportSpecification from varats.utils.git_util import ShortCommitHash @@ -33,7 +37,9 @@ class EmptyAnalysis(actions.ProjectStep): # type: ignore project: VProject - def __init__(self, project: Project, experiment_handle: ExperimentHandle, patch: Patch = None): + def __init__(self, project: Project, + experiment_handle: ExperimentHandle, + patch: tp.Optional[Patch] = None): super().__init__(project=project) self.__experiment_handle = experiment_handle self.__patch = patch @@ -42,7 +48,8 @@ def __call__(self) -> actions.StepResult: return self.analyze() def __str__(self, indent: int=0) -> str : - return textwrap.indent(f"* {self.project.name}: EmptyAnalysis", " " * indent) + return textwrap.indent(f"* {self.project.name}: EmptyAnalysis", + " " * indent) def analyze(self) -> actions.StepResult: """Only create a report file.""" @@ -105,14 +112,23 @@ def actions_for_project( if self.__USE_PATCHES: patch_provider = PatchProvider.create_provider_for_project(project) - patches = patch_provider.patches_config.get_patches_for_revision(ShortCommitHash(str(project.revision))) + + patches = [] + if patch_provider: + config = patch_provider.patches_config + patches = config.get_patches_for_revision( + ShortCommitHash(str(project.revision))) for patch in patches: patch_actions = [actions.Compile(project), - EmptyAnalysis(project, self.get_handle(), patch=patch), + EmptyAnalysis(project, + self.get_handle(), + patch=patch), actions.Clean(project)] - analysis_actions.append(actions.RequireAll(wrap_action_list_with_patch(patch_actions, project, patch))) + analysis_actions.append(actions.RequireAll( + wrap_action_list_with_patch(patch_actions, project, patch)) + ) return analysis_actions From 8999d3b0c5820901b1259cedd157248559611aa1 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 4 Jul 2023 16:04:22 +0200 Subject: [PATCH 020/224] Formatting --- tests/provider/test_patch_provider.py | 169 +++++++++++------- .../varats/provider/patch/patch_provider.py | 147 ++++++++------- varats-core/varats/report/report.py | 10 +- .../varats/experiments/base/just_compile.py | 41 +++-- 4 files changed, 220 insertions(+), 147 deletions(-) diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index 9f9d51f9c..bf832bff0 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -7,20 +7,29 @@ from benchbuild.utils.revision_ranges import _get_git_for_path from tests.helper_utils import TEST_INPUTS_DIR -from varats.projects.perf_tests.feature_perf_cs_collection import FeaturePerfCSCollection -from varats.provider.patch.patch_provider import ProjectPatchesConfiguration, PatchProvider - +from varats.projects.perf_tests.feature_perf_cs_collection import ( + FeaturePerfCSCollection, +) +from varats.provider.patch.patch_provider import ( + ProjectPatchesConfiguration, + PatchProvider, +) from varats.utils.git_util import ShortCommitHash class TestPatchProvider(unittest.TestCase): + def test_correct_patch_config_access(self): """Checks if we get a correct path for accessing the PatchConfig.""" - provider = PatchProvider.create_provider_for_project(FeaturePerfCSCollection) + provider = PatchProvider.create_provider_for_project( + FeaturePerfCSCollection + ) self.assertIsNotNone(provider) def test_get_patch_by_shortname(self): - provider = PatchProvider.create_provider_for_project(FeaturePerfCSCollection) + provider = PatchProvider.create_provider_for_project( + FeaturePerfCSCollection + ) self.assertIsNotNone(provider) patch = provider.patches_config.get_by_shortname("patch-10") @@ -31,10 +40,15 @@ def test_get_patch_by_shortname(self): class TestPatchRevisionRanges(unittest.TestCase): + @classmethod def setUpClass(cls) -> None: patch_config = ProjectPatchesConfiguration.from_xml( - Path(TEST_INPUTS_DIR / 'patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml')) + Path( + TEST_INPUTS_DIR / + 'patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml' + ) + ) cls.patch_config = patch_config project_git_source = bb.source.Git( @@ -46,12 +60,18 @@ def setUpClass(cls) -> None: project_git_source.fetch() - repo_git = _get_git_for_path(target_prefix() + "/FeaturePerfCSCollection") + repo_git = _get_git_for_path( + target_prefix() + "/FeaturePerfCSCollection" + ) - cls.all_revisions = {ShortCommitHash(h) for h in - repo_git('log', '--pretty=%H', '--first-parent').strip().split()} + cls.all_revisions = { + ShortCommitHash(h) for h in + repo_git('log', '--pretty=%H', '--first-parent').strip().split() + } - def __test_patch_revisions(self, shortname: str, expected_revisions: set[ShortCommitHash]): + def __test_patch_revisions( + self, shortname: str, expected_revisions: set[ShortCommitHash] + ): patch = self.patch_config.get_by_shortname(shortname) self.assertSetEqual(expected_revisions, patch.valid_revisions) @@ -60,73 +80,102 @@ def test_unrestricted_range(self): self.__test_patch_revisions("unrestricted-range", self.all_revisions) def test_include_single_revision(self): - expected_revisions = {ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e")} + expected_revisions = { + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e") + } - self.__test_patch_revisions("include-single-revision", expected_revisions) + self.__test_patch_revisions( + "include-single-revision", expected_revisions + ) def test_include_revision_range(self): - expected_revisions = {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), - ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), - ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), - ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), - ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9")} - - self.__test_patch_revisions("include-revision-range", expected_revisions) + expected_revisions = { + ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9") + } + + self.__test_patch_revisions( + "include-revision-range", expected_revisions + ) def test_included_single_and_revision_range(self): - expected_revisions = {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), - ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), - ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), - ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), - ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9"), - ShortCommitHash("27f17080376e409860405c40744887d81d6b3f34")} - - self.__test_patch_revisions("include-single-and-revision-range", expected_revisions) + expected_revisions = { + ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9"), + ShortCommitHash("27f17080376e409860405c40744887d81d6b3f34") + } + + self.__test_patch_revisions( + "include-single-and-revision-range", expected_revisions + ) def test_exclude_single_revision(self): expected_revisions = deepcopy(self.all_revisions) - expected_revisions.remove(ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e")) + expected_revisions.remove( + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e") + ) - self.__test_patch_revisions("exclude-single-revision", expected_revisions) + self.__test_patch_revisions( + "exclude-single-revision", expected_revisions + ) def test_exclude_revision_range(self): expected_revisions = deepcopy(self.all_revisions) - expected_revisions.difference_update( - {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), - ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), - ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), - ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), - ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9")} + expected_revisions.difference_update({ + ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9") + }) + + self.__test_patch_revisions( + "exclude-revision-range", expected_revisions ) - self.__test_patch_revisions("exclude-revision-range", expected_revisions) - def test_exclude_single_and_revision_range(self): expected_revisions = deepcopy(self.all_revisions) - expected_revisions.difference_update( - {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), - ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), - ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), - ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), - ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9"), - ShortCommitHash("27f17080376e409860405c40744887d81d6b3f34")}) - - self.__test_patch_revisions("exclude-single-and-revision-range", expected_revisions) + expected_revisions.difference_update({ + ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9"), + ShortCommitHash("27f17080376e409860405c40744887d81d6b3f34") + }) + + self.__test_patch_revisions( + "exclude-single-and-revision-range", expected_revisions + ) def test_include_range_exclude_single(self): - expected_revisions = {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), - ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), - ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), - ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9")} - - self.__test_patch_revisions("include-range-exclude-single", expected_revisions) + expected_revisions = { + ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("8ca5cc28e6746eef7340064b5d843631841bf31e"), + ShortCommitHash("c051e44a973ee31b3baa571407694467a513ba68"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9") + } + + self.__test_patch_revisions( + "include-range-exclude-single", expected_revisions + ) def test_include_range_exclude_range(self): - expected_revisions = {ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), - ShortCommitHash("4300ea495e7f013f68e785fdde5c4ead81297999"), - ShortCommitHash("27f17080376e409860405c40744887d81d6b3f34"), - ShortCommitHash("32b28ee90e2475cf44d7a616101bcaba2396168d"), - ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), - ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9")} - - self.__test_patch_revisions("include-range-exclude-range", expected_revisions) + expected_revisions = { + ShortCommitHash("01f9f1f07bef22d4248e8349aba4f0c1f204607e"), + ShortCommitHash("4300ea495e7f013f68e785fdde5c4ead81297999"), + ShortCommitHash("27f17080376e409860405c40744887d81d6b3f34"), + ShortCommitHash("32b28ee90e2475cf44d7a616101bcaba2396168d"), + ShortCommitHash("162db88346b06be20faac6976f1ff9bad986accf"), + ShortCommitHash("745424e3ae1d521ae42e7486df126075d9c37be9") + } + + self.__test_patch_revisions( + "include-range-exclude-range", expected_revisions + ) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 590af7081..2b194a612 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -1,17 +1,17 @@ import os import textwrap -import xml.etree.ElementTree as ET import typing as tp +import xml.etree.ElementTree as ET from pathlib import Path import benchbuild as bb from benchbuild.project import Project -from benchbuild.utils import actions from benchbuild.source.base import target_prefix +from benchbuild.utils import actions from benchbuild.utils.actions import StepResult from benchbuild.utils.revision_ranges import ( _get_all_revisions_between, - _get_git_for_path + _get_git_for_path, ) from plumbum import local @@ -21,7 +21,8 @@ def __get_project_git(project: Project) -> tp.Optional[local.cmd]: return _get_git_for_path( - local.path(project.source_of(project.primary_source))) + local.path(project.source_of(project.primary_source)) + ) class ApplyPatch(actions.ProjectStep): @@ -44,10 +45,11 @@ def __call__(self) -> StepResult: return StepResult.OK def __str__(self, indent: int = 0) -> str: - return textwrap.indent(f"* {self.project.name}: " - f"Apply the patch " - f"'{self.__patch.shortname}' to the project.", - " " * indent) + return textwrap.indent( + f"* {self.project.name}: " + f"Apply the patch " + f"'{self.__patch.shortname}' to the project.", " " * indent + ) class RevertPatch(actions.ProjectStep): @@ -70,20 +72,24 @@ def __call__(self) -> StepResult: return StepResult.OK def __str__(self, indent: int = 0) -> str: - return textwrap.indent(f"* {self.project.name}: " - f"Revert the patch '{self.__patch.shortname}' " - f"from the project.", - " " * indent) + return textwrap.indent( + f"* {self.project.name}: " + f"Revert the patch '{self.__patch.shortname}' " + f"from the project.", " " * indent + ) class Patch: - """A class for storing a single project-specific Patch""" - - def __init__(self, project_name: str, - shortname: str, - description: str, - path: Path, - valid_revisions: tp.Optional[tp.Set[CommitHash]] = None): + """A class for storing a single project-specific Patch.""" + + def __init__( + self, + project_name: str, + shortname: str, + description: str, + path: Path, + valid_revisions: tp.Optional[tp.Set[CommitHash]] = None + ): self.project_name: str = project_name self.shortname: str = shortname self.description: str = description @@ -92,22 +98,22 @@ def __init__(self, project_name: str, class ProjectPatchesConfiguration: - """A class storing a set of patches specific to a project""" + """A class storing a set of patches specific to a project.""" - def __init__(self, project_name: str, - repository: str, - patches: tp.List[Patch]): + def __init__( + self, project_name: str, repository: str, patches: tp.List[Patch] + ): self.project_name: str = project_name self.repository: str = repository self.patches: tp.List[Patch] = patches def get_patches_for_revision(self, revision: CommitHash) -> tp.Set[Patch]: - """Returns all patches that are valid for the given revision""" + """Returns all patches that are valid for the given revision.""" return {p for p in self.patches if revision in p.valid_revisions} def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: - """Returns the patch with the given shortname""" + """Returns the patch with the given shortname.""" for patch in self.patches: if patch.shortname == shortname: @@ -117,7 +123,7 @@ def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: @staticmethod def from_xml(xml_path: Path): - """Creates a ProjectPatchesConfiguration from an XML file""" + """Creates a ProjectPatchesConfiguration from an XML file.""" base_dir = xml_path.parent @@ -153,11 +159,11 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: start_tag = revision_range_tag.find("start") end_tag = revision_range_tag.find("end") - res.update( - {ShortCommitHash(h) for h in - _get_all_revisions_between(start_tag.text.strip(), - end_tag.text.strip(), - repo_git)}) + res.update({ + ShortCommitHash(h) for h in _get_all_revisions_between( + start_tag.text.strip(), end_tag.text.strip(), repo_git + ) + }) return res @@ -179,10 +185,11 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: if include_revs_tag: include_revisions = parse_revisions(include_revs_tag) else: - include_revisions = {ShortCommitHash(h) for h in - repo_git('log', - '--pretty=%H', - '--first-parent').strip().split()} + include_revisions = { + ShortCommitHash(h) + for h in repo_git('log', '--pretty=%H', '--first-parent' + ).strip().split() + } exclude_revs_tag = patch.find("exclude_revisions") @@ -190,11 +197,12 @@ def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: revs = parse_revisions(exclude_revs_tag) include_revisions.difference_update(revs) - patch_list.append(Patch(project_name, - shortname, - description, - path, - include_revisions)) + patch_list.append( + Patch( + project_name, shortname, description, path, + include_revisions + ) + ) return ProjectPatchesConfiguration(project_name, repository, patch_list) @@ -205,15 +213,16 @@ class PatchesNotFoundError(FileNotFoundError): class PatchProvider(Provider): - """A provider for getting patch files for a certain project""" + """A provider for getting patch files for a certain project.""" patches_repository = "git@github.com:se-sic/vara-project-patches.git" def __init__(self, project: tp.Type[Project]): super().__init__(project) - patches_project_dir = Path(self._get_patches_repository_path() - / self.project.NAME) + patches_project_dir = Path( + self._get_patches_repository_path() / self.project.NAME + ) # BB only performs a fetch so our repo might be out of date _get_git_for_path(patches_project_dir)("pull") @@ -231,14 +240,15 @@ def __init__(self, project: tp.Type[Project]): self.patches_config = ProjectPatchesConfiguration.from_xml(conf_file) @classmethod - def create_provider_for_project(cls: tp.Type[ProviderType], - project: tp.Type[Project]): + def create_provider_for_project( + cls: tp.Type[ProviderType], project: tp.Type[Project] + ): """ - Creates a provider instance for the given project if possible. + Creates a provider instance for the given project if possible. - Returns: - a provider instance for the given project if possible, - otherwise, ``None`` + Returns: + a provider instance for the given project if possible, + otherwise, ``None`` """ try: return PatchProvider(project) @@ -247,15 +257,18 @@ def create_provider_for_project(cls: tp.Type[ProviderType], return None @classmethod - def create_default_provider(cls: tp.Type[ProviderType], - project: tp.Type[Project]): + def create_default_provider( + cls: tp.Type[ProviderType], project: tp.Type[Project] + ): """ - Creates a default provider instance that can be used with any project. + Creates a default provider instance that can be used with any project. - Returns: - a default provider instance + Returns: + a default provider instance """ - raise AssertionError("All usages should be covered by the project specific provider.") + raise AssertionError( + "All usages should be covered by the project specific provider." + ) @staticmethod def _get_patches_repository_path() -> Path: @@ -275,18 +288,19 @@ def create_patch_action_list(project: Project, standard_actions: tp.MutableSequence[actions.Step], commit: CommitHash) \ -> tp.Mapping[str, tp.MutableSequence[actions.Step]]: - """ Creates a map of actions for applying - all patches that are valid for the given revision """ + """Creates a map of actions for applying all patches that are valid for the + given revision.""" result_actions = {} patch_provider = PatchProvider.create_provider_for_project(project) patches = patch_provider.patches_config.get_patches_for_revision(commit) for patch in patches: - result_actions[patch.shortname] = [actions.MakeBuildDir(project), - actions.ProjectEnvironment(project), - ApplyPatch(project, patch), - *standard_actions] + result_actions[patch.shortname] = [ + actions.MakeBuildDir(project), + actions.ProjectEnvironment(project), + ApplyPatch(project, patch), *standard_actions + ] return result_actions @@ -294,8 +308,9 @@ def create_patch_action_list(project: Project, def wrap_action_list_with_patch(action_list: tp.MutableSequence[actions.Step], project: Project, patch: Patch) \ -> tp.MutableSequence[actions.Step]: - """ Wraps the given action list with the given patch """ - return [actions.MakeBuildDir(project), - actions.ProjectEnvironment(project), - ApplyPatch(project, patch), - *action_list] + """Wraps the given action list with the given patch.""" + return [ + actions.MakeBuildDir(project), + actions.ProjectEnvironment(project), + ApplyPatch(project, patch), *action_list + ] diff --git a/varats-core/varats/report/report.py b/varats-core/varats/report/report.py index 5d63efd74..24e8c6988 100644 --- a/varats-core/varats/report/report.py +++ b/varats-core/varats/report/report.py @@ -147,15 +147,15 @@ class ReportFilename(): r"(?P.*)-" + r"(?P.*)-" + r"(?P.*)-(?P.*)-" + r"(?P.*)[_\/](?P[0-9a-fA-F\-]*)" - r"(_config-(?P\d+))?" + r"(_patch-(?P.+))?" + "_" + - FileStatusExtension.get_regex_grp() + r"?" + r"(?P\..*)?" + - "$" + r"(_config-(?P\d+))?" + r"(_patch-(?P.+))?" + + "_" + FileStatusExtension.get_regex_grp() + r"?" + + r"(?P\..*)?" + "$" ) __RESULT_FILE_TEMPLATE = ( "{experiment_shorthand}-" + "{report_shorthand}-" + "{project_name}-" + - "{binary_name}-" + "{project_revision}_" + "{project_uuid}_{patch_name}" + - "{status_ext}" + "{file_ext}" + "{binary_name}-" + "{project_revision}_" + + "{project_uuid}_{patch_name}" + "{status_ext}" + "{file_ext}" ) __CONFIG_SPECIFIC_RESULT_FILE_TEMPLATE = ( diff --git a/varats/varats/experiments/base/just_compile.py b/varats/varats/experiments/base/just_compile.py index 2a3e38a69..24197a3e8 100644 --- a/varats/varats/experiments/base/just_compile.py +++ b/varats/varats/experiments/base/just_compile.py @@ -22,7 +22,7 @@ from varats.provider.patch.patch_provider import ( Patch, PatchProvider, - wrap_action_list_with_patch + wrap_action_list_with_patch, ) from varats.report.report import ReportSpecification from varats.utils.git_util import ShortCommitHash @@ -37,9 +37,12 @@ class EmptyAnalysis(actions.ProjectStep): # type: ignore project: VProject - def __init__(self, project: Project, - experiment_handle: ExperimentHandle, - patch: tp.Optional[Patch] = None): + def __init__( + self, + project: Project, + experiment_handle: ExperimentHandle, + patch: tp.Optional[Patch] = None + ): super().__init__(project=project) self.__experiment_handle = experiment_handle self.__patch = patch @@ -48,8 +51,9 @@ def __call__(self) -> actions.StepResult: return self.analyze() def __str__(self, indent: int = 0) -> str: - return textwrap.indent(f"* {self.project.name}: EmptyAnalysis", - " " * indent) + return textwrap.indent( + f"* {self.project.name}: EmptyAnalysis", " " * indent + ) def analyze(self) -> actions.StepResult: """Only create a report file.""" @@ -86,7 +90,7 @@ class JustCompileReport(VersionExperiment, shorthand="JC"): __USE_PATCHES = True def actions_for_project( - self, project: Project + self, project: Project ) -> tp.MutableSequence[actions.Step]: """Returns the specified steps to run the project(s) specified in the call in a fixed order.""" @@ -117,17 +121,22 @@ def actions_for_project( if patch_provider: config = patch_provider.patches_config patches = config.get_patches_for_revision( - ShortCommitHash(str(project.revision))) + ShortCommitHash(str(project.revision)) + ) for patch in patches: - patch_actions = [actions.Compile(project), - EmptyAnalysis(project, - self.get_handle(), - patch=patch), - actions.Clean(project)] - - analysis_actions.append(actions.RequireAll( - wrap_action_list_with_patch(patch_actions, project, patch)) + patch_actions = [ + actions.Compile(project), + EmptyAnalysis(project, self.get_handle(), patch=patch), + actions.Clean(project) + ] + + analysis_actions.append( + actions.RequireAll( + wrap_action_list_with_patch( + patch_actions, project, patch + ) + ) ) return analysis_actions From 4854e794c00f54137d40412f4419d7b8d1abadfa Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 6 Jul 2023 21:55:24 +0200 Subject: [PATCH 021/224] Adapts FeaturePerfCSCollection to new setup process --- .../projects/perf_tests/feature_perf_cs_collection.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 1f6251ea5..861d3483e 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -1,5 +1,6 @@ """Project file for the feature performance case study collection.""" import typing as tp +from pathlib import Path import benchbuild as bb from benchbuild.command import Command, SourceRoot, WorkloadSet @@ -19,6 +20,7 @@ ) from varats.project.sources import FeatureSource from varats.project.varats_project import VProject +from varats.utils.git_commands import init_all_submodules, update_all_submodules from varats.utils.git_util import RevisionBinaryMap, ShortCommitHash from varats.utils.settings import bb_cfg @@ -127,9 +129,14 @@ def compile(self) -> None: mkdir("-p", feature_perf_source / "build") + init_all_submodules(Path(feature_perf_source)) + update_all_submodules(Path(feature_perf_source)) + with local.cwd(feature_perf_source / "build"): with local.env(CC=str(cc_compiler), CXX=str(cxx_compiler)): - bb.watch(cmake)("-G", "Unix Makefiles", "..") + bb.watch(cmake)( + "..", "-G", "Unix Makefiles", "-DFPCSC_ENABLE_SRC=ON" + ) bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) From a6cf7d7d37226fa6731958127d2c4326c0fb3011 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 6 Jul 2023 22:41:19 +0200 Subject: [PATCH 022/224] Adds Synthetic case studies as projects --- .../perf_tests/feature_perf_cs_collection.py | 266 ++++++++++++++++++ 1 file changed, 266 insertions(+) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 861d3483e..c6e8f2a1f 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -142,3 +142,269 @@ def compile(self) -> None: with local.cwd(feature_perf_source): verify_binaries(self) + + +class FPCSProjectBase(): + """Base class that implements common project functionality.""" + # TODO: make function if not other shared state exists + + @staticmethod + def do_compile(project: VProject, cmake_flag: str) -> None: + """Compile the project.""" + feature_perf_source = local.path( + project.source_of(project.primary_source) + ) + + cc_compiler = bb.compiler.cc(project) + cxx_compiler = bb.compiler.cxx(project) + + mkdir("-p", feature_perf_source / "build") + + init_all_submodules(Path(feature_perf_source)) + update_all_submodules(Path(feature_perf_source)) + + with local.cwd(feature_perf_source / "build"): + with local.env(CC=str(cc_compiler), CXX=str(cxx_compiler)): + bb.watch(cmake + )("..", "-G", "Unix Makefiles", f"-D{cmake_flag}=ON") + + bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) + + with local.cwd(feature_perf_source): + verify_binaries(project) + + +class SynthSAFieldSensitivity(VProject, FPCSProjectBase): + """Synthetic case-study project for testing field sensitivity.""" + + NAME = 'SynthSAFieldSensitivity' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthSAFieldSensitivity", + refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarks", + limit=None, + shallow=False, + version_filter=project_filter_generator("SynthSAFieldSensitivity") + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("SynthSAFieldSensitivity") / RSBinary("FieldSense"), + label="FieldSense-no-input" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthSAFieldSensitivity.NAME) + ) + + binary_map.specify_binary( + "build/bin/FieldSense", + BinaryType.EXECUTABLE, + # TODO: fix with commit after merge + # only_valid_in=RevisionRange("162db88346", "master") + only_valid_in=RevisionRange( + "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarks" + ) + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + FPCSProjectBase.do_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHSAFIELDSENSITIVITY" + ) + + +class SynthSAFlowSensitivity(VProject, FPCSProjectBase): + """Synthetic case-study project for testing flow sensitivity.""" + + NAME = 'SynthSAFlowSensitivity' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthSAFlowSensitivity", + refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarks", + limit=None, + shallow=False, + version_filter=project_filter_generator("SynthSAFlowSensitivity") + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("SynthSAFlowSensitivity") / RSBinary("FlowSense"), + label="FlowSense-no-input" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthSAFlowSensitivity.NAME) + ) + + binary_map.specify_binary( + "build/bin/FlowSense", + BinaryType.EXECUTABLE, + # TODO: fix with commit after merge + # only_valid_in=RevisionRange("162db88346", "master") + only_valid_in=RevisionRange( + "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarks" + ) + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + FPCSProjectBase.do_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHSAFLOWSENSITIVITY" + ) + + +class SynthSAContextSensitivity(VProject, FPCSProjectBase): + """Synthetic case-study project for testing flow sensitivity.""" + + NAME = 'SynthSAContextSensitivity' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthSAContextSensitivity", + refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarks", + limit=None, + shallow=False, + version_filter=project_filter_generator( + "SynthSAContextSensitivity" + ) + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("SynthSAContextSensitivity") / + RSBinary("ContextSense"), + label="ContextSense-no-input" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthSAContextSensitivity.NAME) + ) + + binary_map.specify_binary( + "build/bin/ContextSense", + BinaryType.EXECUTABLE, + # TODO: fix with commit after merge + # only_valid_in=RevisionRange("162db88346", "master") + only_valid_in=RevisionRange( + "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarks" + ) + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + FPCSProjectBase.do_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHSACONTEXTSENSITIVITY" + ) + + +class SynthSAInterProcedural(VProject, FPCSProjectBase): + """Synthetic case-study project for testing flow sensitivity.""" + + NAME = 'SynthSAInterProcedural' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthSAInterProcedural", + refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarks", + limit=None, + shallow=False, + version_filter=project_filter_generator("SynthSAInterProcedural") + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("SynthSAInterProcedural") / + RSBinary("InterProcedural"), + label="ContextSense-no-input" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthSAInterProcedural.NAME) + ) + + binary_map.specify_binary( + "build/bin/InterProcedural", + BinaryType.EXECUTABLE, + # TODO: fix with commit after merge + # only_valid_in=RevisionRange("162db88346", "master") + only_valid_in=RevisionRange( + "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarks" + ) + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + FPCSProjectBase.do_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHSAINTERPROCEDURAL" + ) From 2e1f7200b611fd1ecffeee2604f8e9622522eabf Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 6 Jul 2023 22:50:41 +0200 Subject: [PATCH 023/224] Refactor helper method into function --- .../perf_tests/feature_perf_cs_collection.py | 91 +++++++------------ 1 file changed, 33 insertions(+), 58 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index c6e8f2a1f..c9633885b 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -25,6 +25,30 @@ from varats.utils.settings import bb_cfg +def _do_feature_perf_cs_collection_compile( + project: VProject, cmake_flag: str +) -> None: + """Base class that implements common project functionality.""" + feature_perf_source = local.path(project.source_of(project.primary_source)) + + cc_compiler = bb.compiler.cc(project) + cxx_compiler = bb.compiler.cxx(project) + + mkdir("-p", feature_perf_source / "build") + + init_all_submodules(Path(feature_perf_source)) + update_all_submodules(Path(feature_perf_source)) + + with local.cwd(feature_perf_source / "build"): + with local.env(CC=str(cc_compiler), CXX=str(cxx_compiler)): + bb.watch(cmake)("..", "-G", "Unix Makefiles", f"-D{cmake_flag}=ON") + + bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) + + with local.cwd(feature_perf_source): + verify_binaries(project) + + class FeaturePerfCSCollection(VProject): """Test project for feature performance case studies.""" @@ -122,59 +146,10 @@ def run_tests(self) -> None: def compile(self) -> None: """Compile the project.""" - feature_perf_source = local.path(self.source_of(self.primary_source)) - - cc_compiler = bb.compiler.cc(self) - cxx_compiler = bb.compiler.cxx(self) - - mkdir("-p", feature_perf_source / "build") - - init_all_submodules(Path(feature_perf_source)) - update_all_submodules(Path(feature_perf_source)) - - with local.cwd(feature_perf_source / "build"): - with local.env(CC=str(cc_compiler), CXX=str(cxx_compiler)): - bb.watch(cmake)( - "..", "-G", "Unix Makefiles", "-DFPCSC_ENABLE_SRC=ON" - ) - - bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) - - with local.cwd(feature_perf_source): - verify_binaries(self) - - -class FPCSProjectBase(): - """Base class that implements common project functionality.""" - # TODO: make function if not other shared state exists - - @staticmethod - def do_compile(project: VProject, cmake_flag: str) -> None: - """Compile the project.""" - feature_perf_source = local.path( - project.source_of(project.primary_source) - ) - - cc_compiler = bb.compiler.cc(project) - cxx_compiler = bb.compiler.cxx(project) - - mkdir("-p", feature_perf_source / "build") - - init_all_submodules(Path(feature_perf_source)) - update_all_submodules(Path(feature_perf_source)) - - with local.cwd(feature_perf_source / "build"): - with local.env(CC=str(cc_compiler), CXX=str(cxx_compiler)): - bb.watch(cmake - )("..", "-G", "Unix Makefiles", f"-D{cmake_flag}=ON") - - bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) - - with local.cwd(feature_perf_source): - verify_binaries(project) + _do_feature_perf_cs_collection_compile(self, "FPCSC_ENABLE_SRC") -class SynthSAFieldSensitivity(VProject, FPCSProjectBase): +class SynthSAFieldSensitivity(VProject): """Synthetic case-study project for testing field sensitivity.""" NAME = 'SynthSAFieldSensitivity' @@ -227,12 +202,12 @@ def run_tests(self) -> None: def compile(self) -> None: """Compile the project.""" - FPCSProjectBase.do_compile( + _do_feature_perf_cs_collection_compile( self, "FPCSC_ENABLE_PROJECT_SYNTHSAFIELDSENSITIVITY" ) -class SynthSAFlowSensitivity(VProject, FPCSProjectBase): +class SynthSAFlowSensitivity(VProject): """Synthetic case-study project for testing flow sensitivity.""" NAME = 'SynthSAFlowSensitivity' @@ -285,12 +260,12 @@ def run_tests(self) -> None: def compile(self) -> None: """Compile the project.""" - FPCSProjectBase.do_compile( + _do_feature_perf_cs_collection_compile( self, "FPCSC_ENABLE_PROJECT_SYNTHSAFLOWSENSITIVITY" ) -class SynthSAContextSensitivity(VProject, FPCSProjectBase): +class SynthSAContextSensitivity(VProject): """Synthetic case-study project for testing flow sensitivity.""" NAME = 'SynthSAContextSensitivity' @@ -346,12 +321,12 @@ def run_tests(self) -> None: def compile(self) -> None: """Compile the project.""" - FPCSProjectBase.do_compile( + _do_feature_perf_cs_collection_compile( self, "FPCSC_ENABLE_PROJECT_SYNTHSACONTEXTSENSITIVITY" ) -class SynthSAInterProcedural(VProject, FPCSProjectBase): +class SynthSAInterProcedural(VProject): """Synthetic case-study project for testing flow sensitivity.""" NAME = 'SynthSAInterProcedural' @@ -405,6 +380,6 @@ def run_tests(self) -> None: def compile(self) -> None: """Compile the project.""" - FPCSProjectBase.do_compile( + _do_feature_perf_cs_collection_compile( self, "FPCSC_ENABLE_PROJECT_SYNTHSAINTERPROCEDURAL" ) From 208d30a1a418d4e14c2488fb52d04ea59fc7060b Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 7 Jul 2023 13:49:10 +0200 Subject: [PATCH 024/224] Inital work on implementing eval table --- .../experiments/vara/feature_perf_runner.py | 2 +- .../perf_tests/feature_perf_cs_collection.py | 4 +- .../varats/tables/feature_perf_precision.py | 212 ++++++++++++++++++ varats/varats/tables/time_workloads.py | 6 +- 4 files changed, 220 insertions(+), 4 deletions(-) create mode 100644 varats/varats/tables/feature_perf_precision.py diff --git a/varats/varats/experiments/vara/feature_perf_runner.py b/varats/varats/experiments/vara/feature_perf_runner.py index ad6c3b424..7e69134f1 100644 --- a/varats/varats/experiments/vara/feature_perf_runner.py +++ b/varats/varats/experiments/vara/feature_perf_runner.py @@ -42,7 +42,7 @@ def actions_for_project( project.cflags += self.get_vara_feature_cflags(project) project.cflags += self.get_vara_tracing_cflags( - instr_type, project=project + instr_type, project=project, instruction_threshold=0 ) project.ldflags += self.get_vara_tracing_ldflags() diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index c9633885b..1a39a6cc2 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -276,7 +276,7 @@ class SynthSAContextSensitivity(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthSAContextSensitivity", - refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarks", + refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", limit=None, shallow=False, version_filter=project_filter_generator( @@ -310,7 +310,7 @@ def binaries_for_revision( # TODO: fix with commit after merge # only_valid_in=RevisionRange("162db88346", "master") only_valid_in=RevisionRange( - "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarks" + "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarksImpl" ) ) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py new file mode 100644 index 000000000..63cb34b74 --- /dev/null +++ b/varats/varats/tables/feature_perf_precision.py @@ -0,0 +1,212 @@ +"""Module for the FeaturePerfPrecision table.""" +import abc +import typing as tp + +import pandas as pd + +from varats.experiments.vara.feature_experiment import FeatureExperiment +from varats.paper.case_study import CaseStudy +from varats.paper.paper_config import get_loaded_paper_config +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.report.report import BaseReport +from varats.revision.revisions import get_processed_revisions_files +from varats.table.table import Table +from varats.table.table_utils import dataframe_to_table +from varats.table.tables import TableFormat, TableGenerator + + +def get_regressing_config_ids_GT(case_study: CaseStudy) -> tp.Dict[int, bool]: + if case_study.project_name == "SynthSAContextSensitivity": + return { + 0: True, + 1: True, + 2: True, + 3: True, + 4: False, + 5: False, + 6: False, + 7: False + } + + raise NotImplementedError() + return {} + + +def map_to_positive_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: + return [config_id for config_id, value in reg_dict.items() if value is True] + + +def map_to_negative_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: + return [ + config_id for config_id, value in reg_dict.items() if value is False + ] + + +class ClassificationResults: + """Helper class to automatically calculate classification results.""" + + def __init__( + self, actual_positive_values: tp.List[tp.Any], + actual_negative_values: tp.List[tp.Any], + predicted_positive_values: tp.List[tp.Any], + predicted_negative_values: tp.List[tp.Any] + ) -> None: + self.__actual_positive_values = actual_positive_values + self.__actual_negative_values = actual_negative_values + self.__predicted_positive_values = predicted_positive_values + self.__predicted_negative_values = predicted_negative_values + + @property + def P(self) -> int: # pylint: disable=C0103 + return len(self.__actual_positive_values) + + @property + def N(self) -> int: # pylint: disable=C0103 + return len(self.__actual_negative_values) + + @property + def PP(self) -> int: # pylint: disable=C0103 + return len(self.__predicted_positive_values) + + @property + def PN(self) -> int: # pylint: disable=C0103 + return len(self.__predicted_negative_values) + + @property + def TP(self) -> int: # pylint: disable=C0103 + return len( + set(self.__actual_positive_values + ).intersection(self.__predicted_positive_values) + ) + + @property + def TN(self) -> int: # pylint: disable=C0103 + return len( + set(self.__actual_negative_values + ).intersection(self.__predicted_negative_values) + ) + + @property + def FP(self) -> int: # pylint: disable=C0103 + return self.PP - self.TP + + @property + def FN(self) -> int: # pylint: disable=C0103 + return self.PN - self.TN + + def precision(self) -> float: + return self.TP / self.PP + + def recall(self) -> float: + return self.TP / self.P + + def specificity(self) -> float: + return self.TN / self.N + + def accuracy(self) -> float: + return (self.TP + self.TN) / (self.P + self.N) + + def balanced_accuracy(self) -> float: + return (self.recall() + self.specificity()) / 2 + + +class Profiler(): + + @property + @abc.abstractmethod + def name(self) -> tp.Type[FeatureExperiment]: + """Hame of the profiler used.""" + + @property + @abc.abstractmethod + def experiment(self) -> tp.Type[FeatureExperiment]: + """Experiment used to produce this profilers information.""" + + @property + @abc.abstractmethod + def report_type(self) -> tp.Type[BaseReport]: + """Report type used to load this profilers information.""" + + +def compute_profiler_predictions( + profiler: Profiler, project_name: str, case_study: CaseStudy +) -> tp.Dict[int, bool]: + report_files = get_processed_revisions_files( + project_name, profiler.experiment, profiler.report_type, + get_case_study_file_name_filter(case_study) + ) + + return {} + + +class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): + """Table that compares the precision of different feature performance + measurement approaches.""" + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [] + + # Data aggregation + df = pd.DataFrame() + table_rows = [] + + for case_study in case_studies: + rev = case_study.revisions[0] + project_name = case_study.project_name + + new_row = { + 'CaseStudy': project_name, + 'Configs': len(case_study.get_config_ids_for_revision(rev)) + } + + ground_truth = get_regressing_config_ids_GT(case_study) + + for profiler in profilers: + predicted = compute_profiler_predictions( + profiler, project_name, case_study + ) + + results = ClassificationResults( + map_to_positive_config_ids(ground_truth), + map_to_negative_config_ids(ground_truth), + map_to_positive_config_ids(predicted), + map_to_negative_config_ids(predicted) + ) + + table_rows.append(new_row) + # df.append(new_row, ignore_index=True) + + df = pd.concat([df, pd.DataFrame(table_rows)]) + df.sort_values(["CaseStudy"], inplace=True) + df.set_index( + ["CaseStudy"], + inplace=True, + ) + + # Table config + + print(f"{df=}") + + kwargs: tp.Dict[str, tp.Any] = {} + # if table_format.is_latex(): + # kwargs["column_format"] = "llr|rr|r|r" + + return dataframe_to_table( + df, + table_format, + wrap_table=wrap_table, + wrap_landscape=True, + **kwargs + ) + + +class FeaturePerfPrecisionTableGenerator( + TableGenerator, generator_name="fperf-precision", options=[] +): + """Generator for `FeaturePerfPrecisionTable`.""" + + def generate(self) -> tp.List[Table]: + return [ + FeaturePerfPrecisionTable(self.table_config, **self.table_kwargs) + ] diff --git a/varats/varats/tables/time_workloads.py b/varats/varats/tables/time_workloads.py index 4120c5210..234e8254a 100644 --- a/varats/varats/tables/time_workloads.py +++ b/varats/varats/tables/time_workloads.py @@ -95,7 +95,11 @@ def wall_clock_time_in_msecs( kwargs["column_format"] = "llr|rr|r|r" return dataframe_to_table( - df, table_format, wrap_table, wrap_landscape=True, **kwargs + df, + table_format, + wrap_table=wrap_table, + wrap_landscape=True, + **kwargs ) From 4947187f5b7c396ccafe5a48d656a74a8f2846ee Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 7 Jul 2023 14:52:02 +0200 Subject: [PATCH 025/224] Implements profiler split --- .../varats/tables/feature_perf_precision.py | 89 ++++++++++++++----- 1 file changed, 67 insertions(+), 22 deletions(-) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 63cb34b74..47e7a6e7d 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -5,10 +5,12 @@ import pandas as pd from varats.experiments.vara.feature_experiment import FeatureExperiment +from varats.experiments.vara.feature_perf_runner import FeaturePerfRunner from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config from varats.paper_mgmt.case_study import get_case_study_file_name_filter from varats.report.report import BaseReport +from varats.report.tef_report import TEFReport from varats.revision.revisions import get_processed_revisions_files from varats.table.table import Table from varats.table.table_utils import dataframe_to_table @@ -16,19 +18,19 @@ def get_regressing_config_ids_GT(case_study: CaseStudy) -> tp.Dict[int, bool]: - if case_study.project_name == "SynthSAContextSensitivity": - return { - 0: True, - 1: True, - 2: True, - 3: True, - 4: False, - 5: False, - 6: False, - 7: False - } - - raise NotImplementedError() + #if case_study.project_name == "SynthSAContextSensitivity": + return { + 0: True, + 1: True, + 2: True, + 3: True, + 4: False, + 5: False, + 6: False, + 7: False + } + + # raise NotImplementedError() return {} @@ -112,20 +114,35 @@ def balanced_accuracy(self) -> float: class Profiler(): + def __init__( + self, name: str, experiment: tp.Type[FeatureExperiment], + report_type: tp.Type[BaseReport] + ) -> None: + self.__name = name + self.__experiment = experiment + self.__report_type = report_type + @property - @abc.abstractmethod - def name(self) -> tp.Type[FeatureExperiment]: + def name(self) -> str: """Hame of the profiler used.""" + return self.__name @property - @abc.abstractmethod def experiment(self) -> tp.Type[FeatureExperiment]: """Experiment used to produce this profilers information.""" + return self.__experiment @property - @abc.abstractmethod def report_type(self) -> tp.Type[BaseReport]: """Report type used to load this profilers information.""" + return self.__report_type + + +class VXray(Profiler): + + def __init__(self) -> None: + # TODO: fix with actual + super().__init__("WXray", FeaturePerfRunner, TEFReport) def compute_profiler_predictions( @@ -145,7 +162,7 @@ class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_loaded_paper_config().get_all_case_studies() - profilers: tp.List[Profiler] = [] + profilers: tp.List[Profiler] = [VXray()] # Data aggregation df = pd.DataFrame() @@ -155,17 +172,31 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: rev = case_study.revisions[0] project_name = case_study.project_name + ground_truth = get_regressing_config_ids_GT(case_study) + new_row = { - 'CaseStudy': project_name, - 'Configs': len(case_study.get_config_ids_for_revision(rev)) + 'CaseStudy': + project_name, + 'Configs': + len(case_study.get_config_ids_for_revision(rev)), + 'RegressedConfigs': + len(map_to_positive_config_ids(ground_truth)) } - ground_truth = get_regressing_config_ids_GT(case_study) - for profiler in profilers: predicted = compute_profiler_predictions( profiler, project_name, case_study ) + predicted = { + 0: True, + 1: True, + 2: False, + 3: True, + 4: False, + 5: True, + 6: True, + 7: False + } results = ClassificationResults( map_to_positive_config_ids(ground_truth), @@ -173,6 +204,10 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: map_to_positive_config_ids(predicted), map_to_negative_config_ids(predicted) ) + new_row[f"{profiler.name}_precision"] = results.precision() + new_row[f"{profiler.name}_recall"] = results.recall() + new_row[f"{profiler.name}_baccuracy" + ] = results.balanced_accuracy() table_rows.append(new_row) # df.append(new_row, ignore_index=True) @@ -184,6 +219,16 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: inplace=True, ) + print(f"{df=}") + colum_setup = [('', 'Configs'), ('', 'RegressedConfigs')] + for profiler in profilers: + colum_setup.append((profiler.name, 'Precision')) + colum_setup.append((profiler.name, 'Recall')) + colum_setup.append((profiler.name, 'BAcc')) + + print(f"{colum_setup=}") + df.columns = pd.MultiIndex.from_tuples(colum_setup) + # Table config print(f"{df=}") From f4428bd68a10b691e4af6c55f6fa379d2dd6bc36 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 7 Jul 2023 14:53:39 +0200 Subject: [PATCH 026/224] Adapts pre-commit config to new version --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c4ca553e5..72648f848 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: rev: 5.11.5 hooks: - id: isort - args: ['-nis'] + args: ['--nis'] - repo: https://github.com/myint/docformatter.git rev: v1.4 hooks: From 44bb43076d13213d163c1f7a8165d5de30ae9253 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 7 Jul 2023 18:30:35 +0200 Subject: [PATCH 027/224] Work on new experiment --- .../vara/feature_perf_precision.py | 160 ++++++++++++++++++ .../varats/tables/feature_perf_precision.py | 4 +- 2 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 varats/varats/experiments/vara/feature_perf_precision.py diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py new file mode 100644 index 000000000..c0d93bd7f --- /dev/null +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -0,0 +1,160 @@ +"""Module for feature performance precision experiments that evaluate +measurement support of vara.""" +import textwrap +import typing as tp +from pathlib import Path + +from benchbuild.command import cleanup +from benchbuild.extensions import compiler, run, time +from benchbuild.utils import actions +from benchbuild.utils.actions import ( + ProjectStep, + Step, + StepResult, + Compile, + Clean, +) +from plumbum import local + +from varats.experiment.experiment_util import ( + ExperimentHandle, + VersionExperiment, + WithUnlimitedStackSize, + ZippedReportFolder, + create_new_success_result_filepath, + get_current_config_id, + get_default_compile_error_wrapped, + get_extra_config_options, +) +from varats.experiment.workload_util import WorkloadCategory, workload_commands +from varats.experiments.vara.feature_experiment import ( + FeatureExperiment, + RunVaRATracedWorkloads, + RunVaRATracedXRayWorkloads, + FeatureInstrType, +) +from varats.project.project_domain import ProjectDomains +from varats.project.project_util import BinaryType +from varats.project.varats_project import VProject +from varats.report.report import ReportSpecification +from varats.report.tef_report import TEFReport + + +class RunTEFTracedWorkloads(ProjectStep): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + experiment_handle: ExperimentHandle, + report_file_ending: str = "json" + ): + super().__init__(project=project) + self.__experiment_handle = experiment_handle + self.__report_file_ending = report_file_ending + + def __call__(self) -> StepResult: + return self.run_traced_code() + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self) -> StepResult: + """Runs the binary with the embedded tracing code.""" + for binary in self.project.binaries: + if binary.type != BinaryType.EXECUTABLE: + # Skip libaries as we cannot run them + continue + + result_filepath = create_new_success_result_filepath( + self.__experiment_handle, + self.__experiment_handle.report_spec().main_report, + self.project, binary, get_current_config_id(self.project) + ) + + with local.cwd(local.path(self.project.builddir)): + with ZippedReportFolder(result_filepath.full_path()) as tmp_dir: + for prj_command in workload_commands( + self.project, binary, [WorkloadCategory.EXAMPLE] + ): + local_tracefile_path = Path( + tmp_dir + ) / f"trace_{prj_command.command.label}" \ + f".{self.__report_file_ending}" + with local.env(VARA_TRACE_FILE=local_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print( + f"Running example {prj_command.command.label}" + ) + + extra_options = get_extra_config_options( + self.project + ) + with cleanup(prj_command): + pb_cmd( + *extra_options, + retcode=binary.valid_exit_codes + ) + + return StepResult.OK + + +class TEFProfileRunner(FeatureExperiment, shorthand="TEFp"): + """Test runner for feature performance.""" + + NAME = "RunTEFProfiler" + + REPORT_SPEC = ReportSpecification(TEFReport) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + instr_type = FeatureInstrType.TEF + + project.cflags += self.get_vara_feature_cflags(project) + + threshold = 0 if project.DOMAIN.value is ProjectDomains.TEST else 100 + project.cflags += self.get_vara_tracing_cflags( + instr_type, project=project, instruction_threshold=threshold + ) + + project.ldflags += self.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = run.RuntimeExtension(project, self) \ + << time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = compiler.RunCompiler(project, self) \ + << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + self.get_handle(), project, TEFReport + ) + + analysis_actions = [] + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + RunVaRATracedWorkloads(project, self.get_handle()) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 47e7a6e7d..59e339497 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -4,6 +4,7 @@ import pandas as pd +import varats.experiments.vara.feature_perf_precision as fpp from varats.experiments.vara.feature_experiment import FeatureExperiment from varats.experiments.vara.feature_perf_runner import FeaturePerfRunner from varats.paper.case_study import CaseStudy @@ -142,7 +143,7 @@ class VXray(Profiler): def __init__(self) -> None: # TODO: fix with actual - super().__init__("WXray", FeaturePerfRunner, TEFReport) + super().__init__("WXray", fpp.TEFProfileRunner, TEFReport) def compute_profiler_predictions( @@ -184,6 +185,7 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: } for profiler in profilers: + # multiple patch cycles predicted = compute_profiler_predictions( profiler, project_name, case_study ) From ef52e4ef924722a728d882673c0dfe787afb5737 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 7 Jul 2023 21:41:57 +0200 Subject: [PATCH 028/224] First draft of a experiment with old/new + patching --- varats-core/varats/utils/git_commands.py | 5 + .../experiments/vara/feature_experiment.py | 2 +- .../vara/feature_perf_precision.py | 151 +++++++++++++----- .../perf_tests/feature_perf_cs_collection.py | 27 ++++ 4 files changed, 140 insertions(+), 45 deletions(-) diff --git a/varats-core/varats/utils/git_commands.py b/varats-core/varats/utils/git_commands.py index 35cd741f0..8dbb60ad7 100644 --- a/varats-core/varats/utils/git_commands.py +++ b/varats-core/varats/utils/git_commands.py @@ -147,3 +147,8 @@ def download_repo( output = git("-C", dl_folder, args) for line in output.split("\n"): post_out(line) + + +def apply_patch(repo_folder: Path, patch_file: Path) -> None: + """Applies a given patch file to the specified git repository.""" + git("-C", repo_folder.absolute(), "apply", str(patch_file)) diff --git a/varats/varats/experiments/vara/feature_experiment.py b/varats/varats/experiments/vara/feature_experiment.py index 52d2ffa16..0d500c21f 100644 --- a/varats/varats/experiments/vara/feature_experiment.py +++ b/varats/varats/experiments/vara/feature_experiment.py @@ -231,7 +231,7 @@ def __call__(self) -> StepResult: def __str__(self, indent: int = 0) -> str: return textwrap.indent( - f"* {self.project.name}: Run instrumentation verifier", indent * " " + f"* {self.project.name}: Run instrumented code", indent * " " ) def run_traced_code(self) -> StepResult: diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index c0d93bd7f..94394ac60 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -14,7 +14,7 @@ Compile, Clean, ) -from plumbum import local +from plumbum import local, ProcessExecutionError from varats.experiment.experiment_util import ( ExperimentHandle, @@ -25,6 +25,7 @@ get_current_config_id, get_default_compile_error_wrapped, get_extra_config_options, + ZippedExperimentSteps, ) from varats.experiment.workload_util import WorkloadCategory, workload_commands from varats.experiments.vara.feature_experiment import ( @@ -34,10 +35,60 @@ FeatureInstrType, ) from varats.project.project_domain import ProjectDomains -from varats.project.project_util import BinaryType +from varats.project.project_util import BinaryType, ProjectBinaryWrapper from varats.project.varats_project import VProject from varats.report.report import ReportSpecification from varats.report.tef_report import TEFReport +from varats.utils.git_commands import apply_patch + + +class ReCompile(ProjectStep): + NAME = "RECOMPILE" + DESCRIPTION = "Recompile the project" + + def __call__(self, _: tp.Any) -> StepResult: + try: + self.project.recompile() + + except ProcessExecutionError: + self.status = StepResult.ERROR + self.status = StepResult.OK + + return self.status + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Recompile", indent * " " + ) + + +class ApplyPatch(ProjectStep): + NAME = "APPLY_PATCH" + DESCRIPTION = "Apply a patch the project" + + def __init__(self, project: VProject, patch_file: Path) -> None: + super().__init__(project) + self.__patch_file = patch_file + + def __call__(self, _: tp.Any) -> StepResult: + try: + print( + f"Applying {self.__patch_file} to {self.project.source_of(self.project.primary_source)}" + ) + apply_patch( + Path(self.project.source_of(self.project.primary_source)), + self.__patch_file + ) + except ProcessExecutionError: + self.status = StepResult.ERROR + self.status = StepResult.OK + + return self.status + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Apply patch", indent * " " + ) class RunTEFTracedWorkloads(ProjectStep): # type: ignore @@ -51,59 +102,45 @@ class RunTEFTracedWorkloads(ProjectStep): # type: ignore def __init__( self, project: VProject, - experiment_handle: ExperimentHandle, + binary: ProjectBinaryWrapper, + result_post_fix: str = "", report_file_ending: str = "json" ): super().__init__(project=project) - self.__experiment_handle = experiment_handle + self.__binary = binary self.__report_file_ending = report_file_ending + self.__result_post_fix = result_post_fix - def __call__(self) -> StepResult: - return self.run_traced_code() + def __call__(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) def __str__(self, indent: int = 0) -> str: return textwrap.indent( f"* {self.project.name}: Run instrumented code", indent * " " ) - def run_traced_code(self) -> StepResult: + def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" - for binary in self.project.binaries: - if binary.type != BinaryType.EXECUTABLE: - # Skip libaries as we cannot run them - continue - - result_filepath = create_new_success_result_filepath( - self.__experiment_handle, - self.__experiment_handle.report_spec().main_report, - self.project, binary, get_current_config_id(self.project) - ) - - with local.cwd(local.path(self.project.builddir)): - with ZippedReportFolder(result_filepath.full_path()) as tmp_dir: - for prj_command in workload_commands( - self.project, binary, [WorkloadCategory.EXAMPLE] - ): - local_tracefile_path = Path( - tmp_dir - ) / f"trace_{prj_command.command.label}" \ - f".{self.__report_file_ending}" - with local.env(VARA_TRACE_FILE=local_tracefile_path): - pb_cmd = prj_command.command.as_plumbum( - project=self.project - ) - print( - f"Running example {prj_command.command.label}" - ) - - extra_options = get_extra_config_options( - self.project - ) - with cleanup(prj_command): - pb_cmd( - *extra_options, - retcode=binary.valid_exit_codes - ) + with local.cwd(local.path(self.project.builddir)): + for prj_command in workload_commands( + self.project, self.__binary, [WorkloadCategory.EXAMPLE] + ): + local_tracefile_path = Path( + tmp_dir + ) / f"trace_{prj_command.command.label}_" \ + f"{self.__result_post_fix}.{self.__report_file_ending}" + with local.env(VARA_TRACE_FILE=local_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print(f"Running example {prj_command.command.label}") + + extra_options = get_extra_config_options(self.project) + with cleanup(prj_command): + pb_cmd( + *extra_options, + retcode=self.__binary.valid_exit_codes + ) return StepResult.OK @@ -149,11 +186,37 @@ def actions_for_project( self.get_handle(), project, TEFReport ) + binary = project.binaries[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + self.get_handle(), + self.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + analysis_actions = [] analysis_actions.append(actions.Compile(project)) analysis_actions.append( - RunVaRATracedWorkloads(project, self.get_handle()) + ZippedExperimentSteps( + result_filepath, [ + RunTEFTracedWorkloads( + project, binary, result_post_fix="old" + ), + ApplyPatch( + project, + Path( + "/home/vulder/git/FeaturePerfCSCollection/test.patch" + ) + ), + ReCompile(project), + RunTEFTracedWorkloads( + project, binary, result_post_fix="new" + ) + ] + ) ) analysis_actions.append(actions.Clean(project)) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 1a39a6cc2..906e085d0 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -49,6 +49,13 @@ def _do_feature_perf_cs_collection_compile( verify_binaries(project) +def _do_feature_perf_cs_collection_recompile(project: VProject) -> None: + feature_perf_source = local.path(project.source_of(project.primary_source)) + + with local.cwd(feature_perf_source / "build"): + bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) + + class FeaturePerfCSCollection(VProject): """Test project for feature performance case studies.""" @@ -148,6 +155,10 @@ def compile(self) -> None: """Compile the project.""" _do_feature_perf_cs_collection_compile(self, "FPCSC_ENABLE_SRC") + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + class SynthSAFieldSensitivity(VProject): """Synthetic case-study project for testing field sensitivity.""" @@ -206,6 +217,10 @@ def compile(self) -> None: self, "FPCSC_ENABLE_PROJECT_SYNTHSAFIELDSENSITIVITY" ) + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + class SynthSAFlowSensitivity(VProject): """Synthetic case-study project for testing flow sensitivity.""" @@ -264,6 +279,10 @@ def compile(self) -> None: self, "FPCSC_ENABLE_PROJECT_SYNTHSAFLOWSENSITIVITY" ) + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + class SynthSAContextSensitivity(VProject): """Synthetic case-study project for testing flow sensitivity.""" @@ -325,6 +344,10 @@ def compile(self) -> None: self, "FPCSC_ENABLE_PROJECT_SYNTHSACONTEXTSENSITIVITY" ) + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + class SynthSAInterProcedural(VProject): """Synthetic case-study project for testing flow sensitivity.""" @@ -383,3 +406,7 @@ def compile(self) -> None: _do_feature_perf_cs_collection_compile( self, "FPCSC_ENABLE_PROJECT_SYNTHSAINTERPROCEDURAL" ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) From f40223d45961daf86a2ea2b6205977455076c144 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 7 Jul 2023 23:37:40 +0200 Subject: [PATCH 029/224] First working pipeline --- varats-core/varats/report/tef_report.py | 4 + varats-core/varats/revision/revisions.py | 19 +- varats/varats/jupyterhelper/file.py | 11 + .../varats/tables/feature_perf_precision.py | 191 +++++++++++++++--- 4 files changed, 199 insertions(+), 26 deletions(-) diff --git a/varats-core/varats/report/tef_report.py b/varats-core/varats/report/tef_report.py index 44bfb8e36..644a2f818 100644 --- a/varats-core/varats/report/tef_report.py +++ b/varats-core/varats/report/tef_report.py @@ -169,6 +169,10 @@ def _parse_json(self) -> None: self.__trace_events: tp.List[TraceEvent] = trace_events +def extract_feature_data(): + pass + + class TEFReportAggregate( ReportAggregate[TEFReport], shorthand=TEFReport.SHORTHAND + ReportAggregate.SHORTHAND, diff --git a/varats-core/varats/revision/revisions.py b/varats-core/varats/revision/revisions.py index 04d987d63..09c45d915 100644 --- a/varats-core/varats/revision/revisions.py +++ b/varats-core/varats/revision/revisions.py @@ -123,7 +123,8 @@ def __get_files_with_status( experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None, report_type: tp.Optional[tp.Type[BaseReport]] = None, file_name_filter: tp.Callable[[str], bool] = lambda x: False, - only_newest: bool = True + only_newest: bool = True, + config_id: tp.Optional[int] = None ) -> tp.List[ReportFilepath]: """ Find all file paths to result files with given file statuses. @@ -148,7 +149,18 @@ def __get_files_with_status( result_files = __get_result_files_dict( project_name, experiment_type, report_type ) + for value in result_files.values(): + # print(f"before {value=}") + # print(f"Local {config_id=}") + if config_id is not None: + value = [ + x for x in value if x.report_filename.config_id == config_id + ] + if not value: + continue + # print(f"after {value=}") + sorted_res_files = sorted( value, key=lambda x: x.stat().st_mtime, reverse=True ) @@ -198,7 +210,8 @@ def get_processed_revisions_files( experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None, report_type: tp.Optional[tp.Type[BaseReport]] = None, file_name_filter: tp.Callable[[str], bool] = lambda x: False, - only_newest: bool = True + only_newest: bool = True, + config_id: tp.Optional[int] = None ) -> tp.List[ReportFilepath]: """ Find all file paths to correctly processed revision files. @@ -219,7 +232,7 @@ def get_processed_revisions_files( """ return __get_files_with_status( project_name, [FileStatusExtension.SUCCESS], experiment_type, - report_type, file_name_filter, only_newest + report_type, file_name_filter, only_newest, config_id ) diff --git a/varats/varats/jupyterhelper/file.py b/varats/varats/jupyterhelper/file.py index 1d1e1ee32..4c89e6f46 100644 --- a/varats/varats/jupyterhelper/file.py +++ b/varats/varats/jupyterhelper/file.py @@ -18,6 +18,7 @@ SZZReport, PyDrillerSZZReport, ) +from varats.report.tef_report import TEFReport def load_commit_report(file_path: PathLikeTy) -> CommitReport: @@ -113,3 +114,13 @@ def load_feature_analysis_report(file_path: PathLikeTy) -> \ file_path (Path): Full path to the file """ return VDM.load_data_class_sync(file_path, FeatureAnalysisReport) + + +def load_tef_report(file_path: PathLikeTy) -> TEFReport: + """ + Load a FeatureAnalysisReport from a file. + + Attributes: + file_path (Path): Full path to the file + """ + return VDM.load_data_class_sync(file_path, TEFReport) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 59e339497..d486599c5 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -1,25 +1,110 @@ """Module for the FeaturePerfPrecision table.""" import abc +import shutil +import tempfile import typing as tp +from pathlib import Path import pandas as pd import varats.experiments.vara.feature_perf_precision as fpp from varats.experiments.vara.feature_experiment import FeatureExperiment from varats.experiments.vara.feature_perf_runner import FeaturePerfRunner +from varats.jupyterhelper.file import load_tef_report from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config from varats.paper_mgmt.case_study import get_case_study_file_name_filter -from varats.report.report import BaseReport -from varats.report.tef_report import TEFReport +from varats.report.report import BaseReport, ReportFilepath +from varats.report.tef_report import TEFReport, TraceEvent, TraceEventType from varats.revision.revisions import get_processed_revisions_files from varats.table.table import Table from varats.table.table_utils import dataframe_to_table from varats.table.tables import TableFormat, TableGenerator +def get_interactions_from_fr_string(interactions: str) -> str: + """Convert the feature strings in a TEFReport from FR(x,y) to x*y, similar + to the format used by SPLConqueror.""" + interactions = ( + interactions.replace("FR", "").replace("(", "").replace(")", "") + ) + interactions_list = interactions.split(",") + # Ignore interactions with base, but do not remove base if it's the only + # feature + if "Base" in interactions_list and len(interactions_list) > 1: + interactions_list.remove("Base") + # Features cannot interact with itself, so remove duplicastes + interactions_list = list(set(interactions_list)) + + interactions_str = "*".join(interactions_list) + + return interactions_str + + +def get_feature_performance_from_tef_report( + tef_report: TEFReport, +) -> tp.Dict[str, int]: + """Extract feature performance from a TEFReport.""" + open_events: tp.List[TraceEvent] = [] + + feature_performances: tp.Dict[str, int] = {} + + for trace_event in tef_report.trace_events: + if trace_event.category == "Feature": + if (trace_event.event_type == TraceEventType.DURATION_EVENT_BEGIN): + open_events.append(trace_event) + elif (trace_event.event_type == TraceEventType.DURATION_EVENT_END): + opening_event = open_events.pop() + + end_timestamp = trace_event.timestamp + begin_timestamp = opening_event.timestamp + + # Subtract feature duration from parent duration such that + # it is not counted twice, similar to behavior in + # Performance-Influence models. + interactions = [event.name for event in open_events] + if open_events: + # Parent is equivalent to interaction of all open + # events. + interaction_string = get_interactions_from_fr_string( + ",".join(interactions) + ) + if interaction_string in feature_performances: + feature_performances[interaction_string] -= ( + end_timestamp - begin_timestamp + ) + else: + feature_performances[interaction_string] = -( + end_timestamp - begin_timestamp + ) + + interaction_string = get_interactions_from_fr_string( + ",".join(interactions + [trace_event.name]) + ) + + current_performance = feature_performances.get( + interaction_string, 0 + ) + feature_performances[interaction_string] = ( + current_performance + end_timestamp - begin_timestamp + ) + + return feature_performances + + def get_regressing_config_ids_GT(case_study: CaseStudy) -> tp.Dict[int, bool]: - #if case_study.project_name == "SynthSAContextSensitivity": + if case_study.project_name == "SynthSAContextSensitivity": + return { + 0: True, + 1: False, + 2: False, + 3: True, + 4: False, + 5: False, + 6: False, + 7: False + } + return { 0: True, 1: True, @@ -98,6 +183,9 @@ def FN(self) -> int: # pylint: disable=C0103 return self.PN - self.TN def precision(self) -> float: + if self.PP == 0: + return 0.0 + return self.TP / self.PP def recall(self) -> float: @@ -138,23 +226,77 @@ def report_type(self) -> tp.Type[BaseReport]: """Report type used to load this profilers information.""" return self.__report_type + @abc.abstractmethod + def is_regression(self, report_path: ReportFilepath) -> bool: + """Checks if there was a regression between the old an new data.""" + class VXray(Profiler): def __init__(self) -> None: - # TODO: fix with actual super().__init__("WXray", fpp.TEFProfileRunner, TEFReport) + def is_regression(self, report_path: ReportFilepath) -> bool: + """Checks if there was a regression between the old an new data.""" + is_regression = False + + with tempfile.TemporaryDirectory() as tmp_result_dir: + shutil.unpack_archive( + report_path.full_path(), extract_dir=tmp_result_dir + ) + old_report = None + new_report = None + for report in Path(tmp_result_dir).iterdir(): + # print(f"Zipped: {report=}") + if report.name.endswith("old.json"): + old_report = load_tef_report(report) + else: + new_report = load_tef_report(report) + + old_features = get_feature_performance_from_tef_report(old_report) + new_features = get_feature_performance_from_tef_report(new_report) + + # TODO: correctly implement how to identify a regression + for feature, old_value in old_features.items(): + if feature in new_features: + new_value = new_features[feature] + if abs(new_value - old_value) > 10000: + print(f"Found regression for feature {feature}.") + is_regression = True + else: + print(f"Could not find feature {feature} in new trace.") + # TODO: how to handle this? + is_regression = True + + return is_regression + def compute_profiler_predictions( - profiler: Profiler, project_name: str, case_study: CaseStudy + profiler: Profiler, project_name: str, case_study: CaseStudy, + config_ids: tp.List[int] ) -> tp.Dict[int, bool]: - report_files = get_processed_revisions_files( - project_name, profiler.experiment, profiler.report_type, - get_case_study_file_name_filter(case_study) - ) - return {} + result_dict: tp.Dict[int, bool] = {} + for config_id in config_ids: + report_files = get_processed_revisions_files( + project_name, + profiler.experiment, + profiler.report_type, + get_case_study_file_name_filter(case_study), + config_id=config_id + ) + # print(f"{config_id=} = {report_files=}") + + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + # TODO: not sure how to handle this + continue + + result_dict[config_id] = profiler.is_regression(report_files[0]) + + print(f"{result_dict=}") + return result_dict class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): @@ -186,19 +328,22 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: for profiler in profilers: # multiple patch cycles - predicted = compute_profiler_predictions( - profiler, project_name, case_study - ) - predicted = { - 0: True, - 1: True, - 2: False, - 3: True, - 4: False, - 5: True, - 6: True, - 7: False - } + if case_study.project_name == "SynthSAContextSensitivity": + predicted = compute_profiler_predictions( + profiler, project_name, case_study, + case_study.get_config_ids_for_revision(rev) + ) + else: + predicted = { + 0: True, + 1: True, + 2: False, + 3: True, + 4: False, + 5: True, + 6: True, + 7: False + } results = ClassificationResults( map_to_positive_config_ids(ground_truth), From 9bed01ef40ae453731c05c841114ac5d91626102 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 8 Jul 2023 21:19:00 +0200 Subject: [PATCH 030/224] Adds black box GT computation --- .../vara/feature_perf_precision.py | 199 +++++++++++++++-- .../varats/tables/feature_perf_precision.py | 202 +++++++++++------- 2 files changed, 305 insertions(+), 96 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 94394ac60..2e1cafcba 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -1,11 +1,13 @@ """Module for feature performance precision experiments that evaluate measurement support of vara.""" +import shutil +import tempfile import textwrap import typing as tp from pathlib import Path +import benchbuild.extensions as bb_ext from benchbuild.command import cleanup -from benchbuild.extensions import compiler, run, time from benchbuild.utils import actions from benchbuild.utils.actions import ( ProjectStep, @@ -14,6 +16,7 @@ Compile, Clean, ) +from benchbuild.utils.cmd import time from plumbum import local, ProcessExecutionError from varats.experiment.experiment_util import ( @@ -37,11 +40,48 @@ from varats.project.project_domain import ProjectDomains from varats.project.project_util import BinaryType, ProjectBinaryWrapper from varats.project.varats_project import VProject -from varats.report.report import ReportSpecification -from varats.report.tef_report import TEFReport +from varats.report.gnu_time_report import TimeReportAggregate +from varats.report.report import ReportSpecification, ReportTy, BaseReport +from varats.report.tef_report import TEFReport, TEFReportAggregate from varats.utils.git_commands import apply_patch +class MultiPatchReport( + BaseReport, tp.Generic[ReportTy], shorthand="MPR", file_type=".zip" +): + + def __init__(self, path: Path, report_type: tp.Type[ReportTy]) -> None: + super().__init__(path) + with tempfile.TemporaryDirectory() as tmp_result_dir: + shutil.unpack_archive(path, extract_dir=tmp_result_dir) + + # TODO: clean up + for report in Path(tmp_result_dir).iterdir(): + if report.name.startswith("old"): + self.__old = report_type(report) + elif report.name.startswith("new"): + self.__new = report_type(report) + + if not self.__old or not self.__new: + raise AssertionError( + "Reports where missing in the file {report_path=}" + ) + + def get_old_report(self) -> ReportTy: + return self.__old + + def get_new_report(self) -> ReportTy: + return self.__new + + +class MPRTRA( + MultiPatchReport[TimeReportAggregate], shorthand="MPRTRA", file_type=".zip" +): + + def __init__(self, path: Path) -> None: + super().__init__(path, TimeReportAggregate) + + class ReCompile(ProjectStep): NAME = "RECOMPILE" DESCRIPTION = "Recompile the project" @@ -109,7 +149,7 @@ def __init__( super().__init__(project=project) self.__binary = binary self.__report_file_ending = report_file_ending - self.__result_post_fix = result_post_fix + self.__result_pre_fix = result_post_fix def __call__(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -125,10 +165,10 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: for prj_command in workload_commands( self.project, self.__binary, [WorkloadCategory.EXAMPLE] ): - local_tracefile_path = Path( - tmp_dir - ) / f"trace_{prj_command.command.label}_" \ - f"{self.__result_post_fix}.{self.__report_file_ending}" + local_tracefile_path = Path(tmp_dir) / ( + f"{self.__result_pre_fix}_trace_" + f"{prj_command.command.label}_.{self.__report_file_ending}" + ) with local.env(VARA_TRACE_FILE=local_tracefile_path): pb_cmd = prj_command.command.as_plumbum( project=self.project @@ -174,12 +214,14 @@ def actions_for_project( project.ldflags += self.get_vara_tracing_ldflags() # Add the required runtime extensions to the project(s). - project.runtime_extension = run.RuntimeExtension(project, self) \ - << time.RunWithTime() + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, self + ) << bb_ext.time.RunWithTime() # Add the required compiler extensions to the project(s). - project.compiler_extension = compiler.RunCompiler(project, self) \ - << WithUnlimitedStackSize() + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, self + ) << WithUnlimitedStackSize() # Add own error handler to compile step. project.compile = get_default_compile_error_wrapped( @@ -201,7 +243,7 @@ def actions_for_project( analysis_actions.append(actions.Compile(project)) analysis_actions.append( ZippedExperimentSteps( - result_filepath, [ + result_filepath, [ # type: ignore RunTEFTracedWorkloads( project, binary, result_post_fix="old" ), @@ -221,3 +263,134 @@ def actions_for_project( analysis_actions.append(actions.Clean(project)) return analysis_actions + + +class RunBackBoxBaseline(ProjectStep): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + result_post_fix: str = "", + report_file_ending: str = "txt", + reps=2 + ): + super().__init__(project=project) + self.__binary = binary + self.__report_file_ending = report_file_ending + self.__result_pre_fix = result_post_fix + self.__reps = reps + + def __call__(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + zip_tmp_dir = tmp_dir / f"{self.__result_pre_fix}_rep_measures" + with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: + for rep in range(0, self.__reps): + for prj_command in workload_commands( + self.project, self.__binary, [WorkloadCategory.EXAMPLE] + ): + time_report_file = Path(reps_tmp_dir) / ( + f"baseline_{prj_command.command.label}_{rep}_" + f".{self.__report_file_ending}" + ) + + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print(f"Running example {prj_command.command.label}") + + timed_pb_cmd = time["-v", "-o", time_report_file, + pb_cmd] + + extra_options = get_extra_config_options(self.project) + with cleanup(prj_command): + timed_pb_cmd( + *extra_options, + retcode=self.__binary.valid_exit_codes + ) + + return StepResult.OK + + +class BlackBoxBaselineRunner(FeatureExperiment, shorthand="BBBase"): + """Test runner for feature performance.""" + + NAME = "GenBBBaseline" + + REPORT_SPEC = ReportSpecification(MPRTRA) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + project.cflags += ["-flto", "-fuse-ld=lld", "-fno-omit-frame-pointer"] + + project.ldflags += self.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, self + ) << bb_ext.time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, self + ) << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + + binary = project.binaries[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + self.get_handle(), + self.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + + analysis_actions = [] + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + ZippedExperimentSteps( + result_filepath, [ # type: ignore + RunBackBoxBaseline(project, binary, result_post_fix="old"), + ApplyPatch( + project, + Path( + "/home/vulder/git/FeaturePerfCSCollection/test.patch" + ) + ), + ReCompile(project), + RunBackBoxBaseline(project, binary, result_post_fix="new") + ] + ) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index d486599c5..d38101144 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -5,7 +5,9 @@ import typing as tp from pathlib import Path +import numpy as np import pandas as pd +from scipy.stats import ttest_ind import varats.experiments.vara.feature_perf_precision as fpp from varats.experiments.vara.feature_experiment import FeatureExperiment @@ -14,12 +16,14 @@ from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.report.gnu_time_report import TimeReportAggregate from varats.report.report import BaseReport, ReportFilepath from varats.report.tef_report import TEFReport, TraceEvent, TraceEventType from varats.revision.revisions import get_processed_revisions_files from varats.table.table import Table from varats.table.table_utils import dataframe_to_table from varats.table.tables import TableFormat, TableGenerator +from varats.utils.git_util import FullCommitHash def get_interactions_from_fr_string(interactions: str) -> str: @@ -92,32 +96,51 @@ def get_feature_performance_from_tef_report( return feature_performances -def get_regressing_config_ids_GT(case_study: CaseStudy) -> tp.Dict[int, bool]: - if case_study.project_name == "SynthSAContextSensitivity": - return { - 0: True, - 1: False, - 2: False, - 3: True, - 4: False, - 5: False, - 6: False, - 7: False - } - - return { - 0: True, - 1: True, - 2: True, - 3: True, - 4: False, - 5: False, - 6: False, - 7: False - } - - # raise NotImplementedError() - return {} +def get_regressing_config_ids_gt( + project_name: str, case_study: CaseStudy, rev: FullCommitHash +) -> tp.Optional[tp.Dict[int, bool]]: + """Computes the baseline data, i.e., the config ids where a regression was + identified.""" + + gt: tp.Dict[int, bool] = {} + + for config_id in case_study.get_config_ids_for_revision(rev): + report_files = get_processed_revisions_files( + project_name, + fpp.BlackBoxBaselineRunner, + fpp.MPRTRA, + get_case_study_file_name_filter(case_study), + config_id=config_id + ) + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print( + f"Could not find profiling data. {config_id=}, " + f"profiler=Baseline" + ) + return None + + time_reports = fpp.MPRTRA(report_files[0].full_path()) + + old_time = time_reports.get_old_report() + new_time = time_reports.get_new_report() + + if np.mean(old_time.measurements_wall_clock_time + ) == np.mean(new_time.measurements_wall_clock_time): + gt[config_id] = False + else: + # TODO: double check ttest handling + ttest_res = ttest_ind( + old_time.measurements_wall_clock_time, + new_time.measurements_wall_clock_time + ) + if ttest_res.pvalue < 0.05: + gt[config_id] = True + else: + gt[config_id] = False + + return gt def map_to_positive_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: @@ -202,6 +225,7 @@ def balanced_accuracy(self) -> float: class Profiler(): + """Profiler interface to add different profilers to the evaluation.""" def __init__( self, name: str, experiment: tp.Type[FeatureExperiment], @@ -232,6 +256,7 @@ def is_regression(self, report_path: ReportFilepath) -> bool: class VXray(Profiler): + """Profiler mapper implementation for the vara tef tracer.""" def __init__(self) -> None: super().__init__("WXray", fpp.TEFProfileRunner, TEFReport) @@ -240,33 +265,45 @@ def is_regression(self, report_path: ReportFilepath) -> bool: """Checks if there was a regression between the old an new data.""" is_regression = False - with tempfile.TemporaryDirectory() as tmp_result_dir: - shutil.unpack_archive( - report_path.full_path(), extract_dir=tmp_result_dir - ) - old_report = None - new_report = None - for report in Path(tmp_result_dir).iterdir(): - # print(f"Zipped: {report=}") - if report.name.endswith("old.json"): - old_report = load_tef_report(report) - else: - new_report = load_tef_report(report) - - old_features = get_feature_performance_from_tef_report(old_report) - new_features = get_feature_performance_from_tef_report(new_report) - - # TODO: correctly implement how to identify a regression - for feature, old_value in old_features.items(): - if feature in new_features: - new_value = new_features[feature] - if abs(new_value - old_value) > 10000: - print(f"Found regression for feature {feature}.") - is_regression = True - else: - print(f"Could not find feature {feature} in new trace.") - # TODO: how to handle this? + # with tempfile.TemporaryDirectory() as tmp_result_dir: + # shutil.unpack_archive( + # report_path.full_path(), extract_dir=tmp_result_dir + # ) + # + # old_report = None + # new_report = None + # for report in Path(tmp_result_dir).iterdir(): + # # print(f"Zipped: {report=}") + # if report.name.endswith("old.json"): + # old_report = load_tef_report(report) + # else: + # new_report = load_tef_report(report) + + # if not old_report or not new_report: + # raise AssertionError( + # "Reports where missing in the file {report_path=}" + # ) + + multi_report = fpp.MultiPatchReport(report_path.full_path(), TEFReport) + + old_features = get_feature_performance_from_tef_report( + multi_report.get_old_report() + ) + new_features = get_feature_performance_from_tef_report( + multi_report.get_new_report() + ) + + # TODO: correctly implement how to identify a regression + for feature, old_value in old_features.items(): + if feature in new_features: + new_value = new_features[feature] + if abs(new_value - old_value) > 10000: + print(f"Found regression for feature {feature}.") is_regression = True + else: + print(f"Could not find feature {feature} in new trace.") + # TODO: how to handle this? + is_regression = True return is_regression @@ -274,7 +311,8 @@ def is_regression(self, report_path: ReportFilepath) -> bool: def compute_profiler_predictions( profiler: Profiler, project_name: str, case_study: CaseStudy, config_ids: tp.List[int] -) -> tp.Dict[int, bool]: +) -> tp.Optional[tp.Dict[int, bool]]: + """Computes the regression predictions for a given profiler.""" result_dict: tp.Dict[int, bool] = {} for config_id in config_ids: @@ -285,13 +323,15 @@ def compute_profiler_predictions( get_case_study_file_name_filter(case_study), config_id=config_id ) - # print(f"{config_id=} = {report_files=}") if len(report_files) > 1: raise AssertionError("Should only be one") if not report_files: - # TODO: not sure how to handle this - continue + print( + f"Could not find profiling data. {config_id=}, " + f"profiler={profiler.name}" + ) + return None result_dict[config_id] = profiler.is_regression(report_files[0]) @@ -315,7 +355,9 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: rev = case_study.revisions[0] project_name = case_study.project_name - ground_truth = get_regressing_config_ids_GT(case_study) + ground_truth = get_regressing_config_ids_gt( + project_name, case_study, rev + ) new_row = { 'CaseStudy': @@ -324,37 +366,31 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: len(case_study.get_config_ids_for_revision(rev)), 'RegressedConfigs': len(map_to_positive_config_ids(ground_truth)) + if ground_truth else np.nan } for profiler in profilers: - # multiple patch cycles - if case_study.project_name == "SynthSAContextSensitivity": - predicted = compute_profiler_predictions( - profiler, project_name, case_study, - case_study.get_config_ids_for_revision(rev) + # TODO: multiple patch cycles + predicted = compute_profiler_predictions( + profiler, project_name, case_study, + case_study.get_config_ids_for_revision(rev) + ) + + if ground_truth and predicted: + results = ClassificationResults( + map_to_positive_config_ids(ground_truth), + map_to_negative_config_ids(ground_truth), + map_to_positive_config_ids(predicted), + map_to_negative_config_ids(predicted) ) + new_row[f"{profiler.name}_precision"] = results.precision() + new_row[f"{profiler.name}_recall"] = results.recall() + new_row[f"{profiler.name}_baccuracy" + ] = results.balanced_accuracy() else: - predicted = { - 0: True, - 1: True, - 2: False, - 3: True, - 4: False, - 5: True, - 6: True, - 7: False - } - - results = ClassificationResults( - map_to_positive_config_ids(ground_truth), - map_to_negative_config_ids(ground_truth), - map_to_positive_config_ids(predicted), - map_to_negative_config_ids(predicted) - ) - new_row[f"{profiler.name}_precision"] = results.precision() - new_row[f"{profiler.name}_recall"] = results.recall() - new_row[f"{profiler.name}_baccuracy" - ] = results.balanced_accuracy() + new_row[f"{profiler.name}_precision"] = np.nan + new_row[f"{profiler.name}_recall"] = np.nan + new_row[f"{profiler.name}_baccuracy"] = np.nan table_rows.append(new_row) # df.append(new_row, ignore_index=True) From db600bf5f3f0d3bab3006e11aa9152c7badf391f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 8 Jul 2023 22:16:27 +0200 Subject: [PATCH 031/224] Adapt wxray measurement to repeat and adds significatc test --- .../vara/feature_perf_precision.py | 59 ++++++++++------ .../varats/tables/feature_perf_precision.py | 67 +++++++++---------- 2 files changed, 70 insertions(+), 56 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 2e1cafcba..44c1f15ba 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -82,6 +82,14 @@ def __init__(self, path: Path) -> None: super().__init__(path, TimeReportAggregate) +class MPRTEFA( + MultiPatchReport[TEFReportAggregate], shorthand="MPRTEFA", file_type=".zip" +): + + def __init__(self, path: Path) -> None: + super().__init__(path, TEFReportAggregate) + + class ReCompile(ProjectStep): NAME = "RECOMPILE" DESCRIPTION = "Recompile the project" @@ -144,12 +152,14 @@ def __init__( project: VProject, binary: ProjectBinaryWrapper, result_post_fix: str = "", - report_file_ending: str = "json" + report_file_ending: str = "json", + reps=2 ): super().__init__(project=project) self.__binary = binary self.__report_file_ending = report_file_ending self.__result_pre_fix = result_post_fix + self.__reps = reps def __call__(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -162,25 +172,32 @@ def __str__(self, indent: int = 0) -> str: def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): - for prj_command in workload_commands( - self.project, self.__binary, [WorkloadCategory.EXAMPLE] - ): - local_tracefile_path = Path(tmp_dir) / ( - f"{self.__result_pre_fix}_trace_" - f"{prj_command.command.label}_.{self.__report_file_ending}" - ) - with local.env(VARA_TRACE_FILE=local_tracefile_path): - pb_cmd = prj_command.command.as_plumbum( - project=self.project - ) - print(f"Running example {prj_command.command.label}") - - extra_options = get_extra_config_options(self.project) - with cleanup(prj_command): - pb_cmd( - *extra_options, - retcode=self.__binary.valid_exit_codes + zip_tmp_dir = tmp_dir / f"{self.__result_pre_fix}_rep_measures" + with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: + for rep in range(0, self.__reps): + for prj_command in workload_commands( + self.project, self.__binary, [WorkloadCategory.EXAMPLE] + ): + local_tracefile_path = Path(reps_tmp_dir) / ( + f"trace_{prj_command.command.label}_{rep}_" + f".{self.__report_file_ending}" ) + with local.env(VARA_TRACE_FILE=local_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print( + f"Running example {prj_command.command.label}" + ) + + extra_options = get_extra_config_options( + self.project + ) + with cleanup(prj_command): + pb_cmd( + *extra_options, + retcode=self.__binary.valid_exit_codes + ) return StepResult.OK @@ -190,7 +207,7 @@ class TEFProfileRunner(FeatureExperiment, shorthand="TEFp"): NAME = "RunTEFProfiler" - REPORT_SPEC = ReportSpecification(TEFReport) + REPORT_SPEC = ReportSpecification(MPRTEFA) def actions_for_project( self, project: VProject @@ -225,7 +242,7 @@ def actions_for_project( # Add own error handler to compile step. project.compile = get_default_compile_error_wrapped( - self.get_handle(), project, TEFReport + self.get_handle(), project, self.REPORT_SPEC.main_report ) binary = project.binaries[0] diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index d38101144..adb7d08f5 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -3,6 +3,7 @@ import shutil import tempfile import typing as tp +from collections import defaultdict from pathlib import Path import numpy as np @@ -18,7 +19,12 @@ from varats.paper_mgmt.case_study import get_case_study_file_name_filter from varats.report.gnu_time_report import TimeReportAggregate from varats.report.report import BaseReport, ReportFilepath -from varats.report.tef_report import TEFReport, TraceEvent, TraceEventType +from varats.report.tef_report import ( + TEFReport, + TraceEvent, + TraceEventType, + TEFReportAggregate, +) from varats.revision.revisions import get_processed_revisions_files from varats.table.table import Table from varats.table.table_utils import dataframe_to_table @@ -259,46 +265,38 @@ class VXray(Profiler): """Profiler mapper implementation for the vara tef tracer.""" def __init__(self) -> None: - super().__init__("WXray", fpp.TEFProfileRunner, TEFReport) + super().__init__("WXray", fpp.TEFProfileRunner, fpp.MPRTEFA) def is_regression(self, report_path: ReportFilepath) -> bool: """Checks if there was a regression between the old an new data.""" is_regression = False - # with tempfile.TemporaryDirectory() as tmp_result_dir: - # shutil.unpack_archive( - # report_path.full_path(), extract_dir=tmp_result_dir - # ) - # - # old_report = None - # new_report = None - # for report in Path(tmp_result_dir).iterdir(): - # # print(f"Zipped: {report=}") - # if report.name.endswith("old.json"): - # old_report = load_tef_report(report) - # else: - # new_report = load_tef_report(report) - - # if not old_report or not new_report: - # raise AssertionError( - # "Reports where missing in the file {report_path=}" - # ) - - multi_report = fpp.MultiPatchReport(report_path.full_path(), TEFReport) - - old_features = get_feature_performance_from_tef_report( - multi_report.get_old_report() - ) - new_features = get_feature_performance_from_tef_report( - multi_report.get_new_report() + multi_report = fpp.MultiPatchReport( + report_path.full_path(), TEFReportAggregate ) - # TODO: correctly implement how to identify a regression - for feature, old_value in old_features.items(): - if feature in new_features: - new_value = new_features[feature] - if abs(new_value - old_value) > 10000: - print(f"Found regression for feature {feature}.") + old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for old_tef_report in multi_report.get_old_report().reports(): + pim = get_feature_performance_from_tef_report(old_tef_report) + for feature, value in pim.items(): + old_acc_pim[feature].append(value) + + new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for new_tef_report in multi_report.get_new_report().reports(): + pim = get_feature_performance_from_tef_report(new_tef_report) + for feature, value in pim.items(): + new_acc_pim[feature].append(value) + + for feature, old_values in old_acc_pim.items(): + if feature in new_acc_pim: + new_values = new_acc_pim[feature] + ttest_res = ttest_ind(old_values, new_values) + + # TODO: check, maybe we need a "very small value cut off" + if ttest_res.pvalue < 0.05: + print( + f"{self.name} found regression for feature {feature}." + ) is_regression = True else: print(f"Could not find feature {feature} in new trace.") @@ -335,7 +333,6 @@ def compute_profiler_predictions( result_dict[config_id] = profiler.is_regression(report_files[0]) - print(f"{result_dict=}") return result_dict From b2db08487e0149a05d13e1b38f00898609c02340 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 9 Jul 2023 13:45:40 +0200 Subject: [PATCH 032/224] Adds extra func. to latex table rendering --- varats/varats/table/table_utils.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/varats/varats/table/table_utils.py b/varats/varats/table/table_utils.py index d6e1d2f1b..4ab944ef2 100644 --- a/varats/varats/table/table_utils.py +++ b/varats/varats/table/table_utils.py @@ -10,7 +10,10 @@ def wrap_table_in_latex_document( - table: str, landscape: bool = False, margin: float = 1.5 + table: str, + landscape: bool = False, + margin: float = 1.5, + document_decorator: tp.Callable[[Document], None] = lambda _: None ) -> str: """ Wraps a table inside a proper latex document. @@ -39,11 +42,14 @@ def wrap_table_in_latex_document( Package("hyperref"), Package("longtable"), Package("multirow"), - Package("xcolor", options=["table"]), + Package("multicol"), + Package("xcolor", options=["table", "dvipsnames"]), ]) doc.change_document_style("empty") + document_decorator(doc) + # embed latex table inside document doc.append(NoEscape(table)) @@ -56,6 +62,8 @@ def dataframe_to_table( style: tp.Optional["pd.io.formats.style.Styler"] = None, wrap_table: bool = False, wrap_landscape: bool = False, + margin: float = 1.5, + document_decorator: tp.Callable[[Document], None] = lambda _: None, **kwargs: tp.Any ) -> str: """ @@ -70,6 +78,9 @@ def dataframe_to_table( document (latex only) wrap_landscape: whether to use landscape mode to wrap the table (latex only) + margin: margin around the table in cm + document_decorator: callable function to decorate the document with + addition things (e.g., packages, macros, etc.) **kwargs: kwargs that get passed to pandas' conversion functions (``DataFrame.to_latex`` or ``DataFrame.to_html``) @@ -82,7 +93,12 @@ def dataframe_to_table( if table_format.is_latex(): table = style.to_latex(**kwargs) if wrap_table: - table = wrap_table_in_latex_document(table, wrap_landscape) + table = wrap_table_in_latex_document( + table, + wrap_landscape, + margin=margin, + document_decorator=document_decorator + ) elif table_format.is_html(): table = style.to_html(**kwargs) From da7a1f39faec600e072c299c0f5a8643b50d4296 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 9 Jul 2023 13:45:57 +0200 Subject: [PATCH 033/224] Polish latex for precision table --- .../varats/tables/feature_perf_precision.py | 49 ++++++++++++++----- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index adb7d08f5..a0da06ec7 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd +from pylatex import Document, NoEscape, Package from scipy.stats import ttest_ind import varats.experiments.vara.feature_perf_precision as fpp @@ -363,7 +364,7 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: len(case_study.get_config_ids_for_revision(rev)), 'RegressedConfigs': len(map_to_positive_config_ids(ground_truth)) - if ground_truth else np.nan + if ground_truth else -1 } for profiler in profilers: @@ -394,17 +395,26 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: df = pd.concat([df, pd.DataFrame(table_rows)]) df.sort_values(["CaseStudy"], inplace=True) - df.set_index( - ["CaseStudy"], - inplace=True, - ) + print(f"{df=}") + #df.set_index( + # ["CaseStudy"], + # inplace=True, + #) + # df = df.astype({'RegressedConfigs': 'int'}) + + symb_precision = "\\textsc{PPV}" + symb_recall = "\\textsc{TPR}" + symb_b_accuracy = "\\textsc{BA}" + symb_configs = "$\\mathbb{C}$" + symb_regressed_configs = "$\\mathbb{R}$" print(f"{df=}") - colum_setup = [('', 'Configs'), ('', 'RegressedConfigs')] + colum_setup = [(' ', 'CaseStudy'), ('', f'{symb_configs}'), + ('', f'{symb_regressed_configs}')] for profiler in profilers: - colum_setup.append((profiler.name, 'Precision')) - colum_setup.append((profiler.name, 'Recall')) - colum_setup.append((profiler.name, 'BAcc')) + colum_setup.append((profiler.name, f'{symb_precision}')) + colum_setup.append((profiler.name, f'{symb_recall}')) + colum_setup.append((profiler.name, f'{symb_b_accuracy}')) print(f"{colum_setup=}") df.columns = pd.MultiIndex.from_tuples(colum_setup) @@ -413,15 +423,32 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: print(f"{df=}") + style: pd.io.formats.style.Styler = df.style kwargs: tp.Dict[str, tp.Any] = {} - # if table_format.is_latex(): - # kwargs["column_format"] = "llr|rr|r|r" + if table_format.is_latex(): + kwargs["hrules"] = True + kwargs["column_format"] = "l|rr|rrr" + kwargs["multicol_align"] = "|c" + kwargs[ + "caption" + ] = f"""Localization precision of different performance profiling approaches to detect configuration-specific performance regression detection. +On the left, we show the amount of different configurations ({symb_configs}) analyzed and the amount of regressed configurations ({symb_regressed_configs}), determined through our baseline measurements. +Furthermore, the table depicts for each profiler, precision ({symb_precision}), recall ({symb_recall}), and balanced accuracy ({symb_b_accuracy}). +""" + style.format(precision=2) + style.hide() + + def add_extras(doc: Document) -> None: + doc.packages.append(Package("amsmath")) + doc.packages.append(Package("amssymb")) return dataframe_to_table( df, table_format, + style=style, wrap_table=wrap_table, wrap_landscape=True, + document_decorator=add_extras, **kwargs ) From 75212eb9c9ccda436c889e4a8bf622651ed52d65 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 9 Jul 2023 18:16:53 +0200 Subject: [PATCH 034/224] Adds overhead measurement experiments with eval table --- .../vara/feature_perf_precision.py | 266 +++++++++++++++++- .../varats/tables/feature_perf_precision.py | 245 +++++++++++++++- 2 files changed, 508 insertions(+), 3 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 44c1f15ba..a2318a362 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -179,7 +179,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: self.project, self.__binary, [WorkloadCategory.EXAMPLE] ): local_tracefile_path = Path(reps_tmp_dir) / ( - f"trace_{prj_command.command.label}_{rep}_" + f"trace_{prj_command.command.label}_{rep}" f".{self.__report_file_ending}" ) with local.env(VARA_TRACE_FILE=local_tracefile_path): @@ -322,7 +322,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: self.project, self.__binary, [WorkloadCategory.EXAMPLE] ): time_report_file = Path(reps_tmp_dir) / ( - f"baseline_{prj_command.command.label}_{rep}_" + f"baseline_{prj_command.command.label}_{rep}" f".{self.__report_file_ending}" ) @@ -411,3 +411,265 @@ def actions_for_project( analysis_actions.append(actions.Clean(project)) return analysis_actions + + +################################################################################ +# Overhead computation +################################################################################ + + +class RunTEFTracedWorkloadsOverhead(ProjectStep): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + report_file_ending: str = "txt", + reps=2 + ): + super().__init__(project=project) + self.__binary = binary + self.__report_file_ending = report_file_ending + self.__reps = reps + + def __call__(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + for rep in range(0, self.__reps): + for prj_command in workload_commands( + self.project, self.__binary, [WorkloadCategory.EXAMPLE] + ): + base = Path("/tmp/") + fake_tracefile_path = base / ( + f"trace_{prj_command.command.label}_{rep}" + f".json" + ) + + time_report_file = tmp_dir / ( + f"overhead_{prj_command.command.label}_{rep}" + f".{self.__report_file_ending}" + ) + + with local.env(VARA_TRACE_FILE=fake_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print(f"Running example {prj_command.command.label}") + + timed_pb_cmd = time["-v", "-o", time_report_file, + pb_cmd] + + extra_options = get_extra_config_options(self.project) + with cleanup(prj_command): + timed_pb_cmd( + *extra_options, + retcode=self.__binary.valid_exit_codes + ) + + return StepResult.OK + + +class TEFProfileOverheadRunner(FeatureExperiment, shorthand="TEFo"): + """Test runner for feature performance.""" + + NAME = "RunTEFProfilerO" + + REPORT_SPEC = ReportSpecification(TimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + instr_type = FeatureInstrType.TEF + + project.cflags += self.get_vara_feature_cflags(project) + + threshold = 0 if project.DOMAIN.value is ProjectDomains.TEST else 100 + project.cflags += self.get_vara_tracing_cflags( + instr_type, project=project, instruction_threshold=threshold + ) + + project.ldflags += self.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, self + ) << bb_ext.time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, self + ) << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + + binary = project.binaries[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + self.get_handle(), + self.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + + analysis_actions = [] + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + ZippedExperimentSteps( + result_filepath, [ + RunTEFTracedWorkloadsOverhead( # type: ignore + project, binary + ) + ] + ) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions + + +class RunBackBoxBaselineOverhead(ProjectStep): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + report_file_ending: str = "txt", + reps=2 + ): + super().__init__(project=project) + self.__binary = binary + self.__report_file_ending = report_file_ending + self.__reps = reps + + def __call__(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Measure profiling overhead", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + for rep in range(0, self.__reps): + for prj_command in workload_commands( + self.project, self.__binary, [WorkloadCategory.EXAMPLE] + ): + time_report_file = tmp_dir / ( + f"overhead_{prj_command.command.label}_{rep}" + f".{self.__report_file_ending}" + ) + + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print(f"Running example {prj_command.command.label}") + + timed_pb_cmd = time["-v", "-o", time_report_file, pb_cmd] + + extra_options = get_extra_config_options(self.project) + with cleanup(prj_command): + timed_pb_cmd( + *extra_options, + retcode=self.__binary.valid_exit_codes + ) + + return StepResult.OK + + +class BlackBoxOverheadBaseline(FeatureExperiment, shorthand="BBBaseO"): + """Test runner for feature performance.""" + + NAME = "GenBBBaselineO" + + REPORT_SPEC = ReportSpecification(TimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + project.cflags += ["-flto", "-fuse-ld=lld", "-fno-omit-frame-pointer"] + + project.ldflags += self.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, self + ) << bb_ext.time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, self + ) << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + + binary = project.binaries[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + self.get_handle(), + self.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + + analysis_actions = [] + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + ZippedExperimentSteps( + result_filepath, + [ + RunBackBoxBaselineOverhead( # type: ignore + project, + binary + ), + ] + ) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index a0da06ec7..d3d353b66 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -236,10 +236,12 @@ class Profiler(): def __init__( self, name: str, experiment: tp.Type[FeatureExperiment], + overhead_experiment: tp.Type[FeatureExperiment], report_type: tp.Type[BaseReport] ) -> None: self.__name = name self.__experiment = experiment + self.__overhead_experiment = overhead_experiment self.__report_type = report_type @property @@ -252,6 +254,11 @@ def experiment(self) -> tp.Type[FeatureExperiment]: """Experiment used to produce this profilers information.""" return self.__experiment + @property + def overhead_experiment(self) -> tp.Type[FeatureExperiment]: + """Experiment used to produce this profilers information.""" + return self.__overhead_experiment + @property def report_type(self) -> tp.Type[BaseReport]: """Report type used to load this profilers information.""" @@ -266,7 +273,10 @@ class VXray(Profiler): """Profiler mapper implementation for the vara tef tracer.""" def __init__(self) -> None: - super().__init__("WXray", fpp.TEFProfileRunner, fpp.MPRTEFA) + super().__init__( + "WXray", fpp.TEFProfileRunner, fpp.TEFProfileOverheadRunner, + fpp.MPRTEFA + ) def is_regression(self, report_path: ReportFilepath) -> bool: """Checks if there was a regression between the old an new data.""" @@ -307,6 +317,19 @@ def is_regression(self, report_path: ReportFilepath) -> bool: return is_regression +class Baseline(Profiler): + """Profiler mapper implementation for the black-box baseline.""" + + def __init__(self) -> None: + super().__init__( + "Base", fpp.BlackBoxBaselineRunner, fpp.BlackBoxOverheadBaseline, + fpp.TimeReportAggregate + ) + + def is_regression(self, report_path: ReportFilepath) -> bool: + raise NotImplementedError() + + def compute_profiler_predictions( profiler: Profiler, project_name: str, case_study: CaseStudy, config_ids: tp.List[int] @@ -462,3 +485,223 @@ def generate(self) -> tp.List[Table]: return [ FeaturePerfPrecisionTable(self.table_config, **self.table_kwargs) ] + + +class OverheadData: + + def __init__( + self, profiler, mean_time: tp.Dict[int, float], + ctx_switches: tp.Dict[int, float] + ) -> None: + self.__profiler = profiler + self.__mean_time: tp.Dict[int, float] = mean_time + self.__mean_ctx_switches: tp.Dict[int, float] = ctx_switches + + def mean_time(self) -> float: + return np.mean(list(map(lambda x: float(x), self.__mean_time.values()))) + + def mean_ctx(self) -> float: + return np.mean( + list(map(lambda x: float(x), self.__mean_ctx_switches.values())) + ) + + def config_wise_time_diff(self, + other: 'OverheadData') -> tp.Dict[int, float]: + return self.__config_wise(self.__mean_time, other.__mean_time) + + def config_wise_ctx_diff(self, + other: 'OverheadData') -> tp.Dict[int, float]: + return self.__config_wise( + self.__mean_ctx_switches, other.__mean_ctx_switches + ) + + @staticmethod + def __config_wise( + self_map: tp.Dict[int, float], other_map: tp.Dict[int, float] + ) -> tp.Dict[int, float]: + gen_diff: tp.Dict[int, float] = {} + for config_id, gen_value in self_map.items(): + if config_id not in other_map: + raise AssertionError("Could not find config id in other") + + gen_diff[config_id] = gen_value - other_map[config_id] + + return gen_diff + + @staticmethod + def compute_overhead_data( + profiler: Profiler, case_study: CaseStudy, rev: FullCommitHash + ) -> tp.Optional['OverheadData']: + + mean_time: tp.Dict[int, float] = {} + mean_cxt_switches: tp.Dict[int, float] = {} + + for config_id in case_study.get_config_ids_for_revision(rev): + report_files = get_processed_revisions_files( + case_study.project_name, + profiler.overhead_experiment, + TimeReportAggregate, + get_case_study_file_name_filter(case_study), + config_id=config_id + ) + + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print( + f"Could not find overhead data. {config_id=}, " + f"profiler={profiler.name}" + ) + return None + + time_report = TimeReportAggregate(report_files[0].full_path()) + mean_time[config_id] = float( + np.mean(time_report.measurements_wall_clock_time) + ) + mean_cxt_switches[config_id] = float( + np.mean(time_report.measurements_ctx_switches) + ) + if not mean_time: + print( + f"Case study for project {case_study.project_name} had " + "no configs, skipping..." + ) + return None + + # print(f"{mean_time=}") + return OverheadData(profiler, mean_time, mean_cxt_switches) + + +class FeaturePerfOverheadTable(Table, table_name="fperf_overhead"): + """Table that compares overhead of different feature performance measurement + approaches.""" + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray()] + + # Data aggregation + df = pd.DataFrame() + table_rows = [] + + for case_study in case_studies: + rev = case_study.revisions[0] + project_name = case_study.project_name + + overhead_ground_truth = OverheadData.compute_overhead_data( + Baseline(), case_study, rev + ) + if not overhead_ground_truth: + print( + f"No baseline data for {case_study.project_name}, skipping" + ) + continue + + new_row = { + 'CaseStudy': project_name, + 'WithoutProfiler_mean_time': overhead_ground_truth.mean_time(), + 'WithoutProfiler_mean_ctx': overhead_ground_truth.mean_ctx() + } + + for profiler in profilers: + profiler_overhead = OverheadData.compute_overhead_data( + profiler, case_study, rev + ) + if profiler_overhead: + time_diff = profiler_overhead.config_wise_time_diff( + overhead_ground_truth + ) + ctx_diff = profiler_overhead.config_wise_ctx_diff( + overhead_ground_truth + ) + print(f"{time_diff=}") + new_row[f"{profiler.name}_time_mean"] = np.mean( + list(time_diff.values()) + ) + new_row[f"{profiler.name}_time_std"] = np.std( + list(time_diff.values()) + ) + new_row[f"{profiler.name}_time_max"] = np.max( + list(time_diff.values()) + ) + new_row[f"{profiler.name}_ctx_mean"] = np.mean( + list(ctx_diff.values()) + ) + new_row[f"{profiler.name}_ctx_std"] = np.std( + list(ctx_diff.values()) + ) + new_row[f"{profiler.name}_ctx_max"] = np.max( + list(ctx_diff.values()) + ) + else: + new_row[f"{profiler.name}_time_mean"] = np.nan + new_row[f"{profiler.name}_time_std"] = np.nan + new_row[f"{profiler.name}_time_max"] = np.nan + + new_row[f"{profiler.name}_ctx_mean"] = np.nan + new_row[f"{profiler.name}_ctx_std"] = np.nan + new_row[f"{profiler.name}_ctx_max"] = np.nan + + table_rows.append(new_row) + # df.append(new_row, ignore_index=True) + + df = pd.concat([df, pd.DataFrame(table_rows)]) + df.sort_values(["CaseStudy"], inplace=True) + print(f"{df=}") + + colum_setup = [('', '', 'CaseStudy'), ('Baseline', 'time', 'mean'), + ('Baseline', 'ctx', 'mean')] + for profiler in profilers: + colum_setup.append((profiler.name, 'time', 'mean')) + colum_setup.append((profiler.name, 'time', 'std')) + colum_setup.append((profiler.name, 'time', 'max')) + + colum_setup.append((profiler.name, 'ctx', 'mean')) + colum_setup.append((profiler.name, 'ctx', 'std')) + colum_setup.append((profiler.name, 'ctx', 'max')) + + print(f"{colum_setup=}") + df.columns = pd.MultiIndex.from_tuples(colum_setup) + + # Table config + + print(f"{df=}") + + style: pd.io.formats.style.Styler = df.style + kwargs: tp.Dict[str, tp.Any] = {} + if table_format.is_latex(): + kwargs["hrules"] = True + kwargs["column_format"] = "l|rr|rrrrrr" + kwargs["multicol_align"] = "|c" + kwargs["caption" + ] = """This table depicts the overhead measurement data. +For each case study, we show on the left the mean time it took to execute it without instrumentation (Baseline). +To the right of the baseline, we show for each profiler the induced overhead. +""" + style.format(precision=2) + style.hide() + + def add_extras(doc: Document) -> None: + doc.packages.append(Package("amsmath")) + doc.packages.append(Package("amssymb")) + + return dataframe_to_table( + df, + table_format, + style=style, + wrap_table=wrap_table, + wrap_landscape=True, + document_decorator=add_extras, + **kwargs + ) + + +class FeaturePerfOverheadTableGenerator( + TableGenerator, generator_name="fperf-overhead", options=[] +): + """Generator for `FeaturePerfOverheadTable`.""" + + def generate(self) -> tp.List[Table]: + return [ + FeaturePerfOverheadTable(self.table_config, **self.table_kwargs) + ] From d15918aa6417f47fb95b3386d222bd5022a04434 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 9 Jul 2023 20:12:16 +0200 Subject: [PATCH 035/224] Implements experiments and table support for PIM tracer --- .../vara/feature_perf_precision.py | 326 +++++++++++------- .../varats/tables/feature_perf_precision.py | 70 +++- 2 files changed, 278 insertions(+), 118 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index a2318a362..d73ea1986 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -19,6 +19,9 @@ from benchbuild.utils.cmd import time from plumbum import local, ProcessExecutionError +from varats.data.reports.performance_influence_trace_report import ( + PerfInfluenceTraceReportAggregate, +) from varats.experiment.experiment_util import ( ExperimentHandle, VersionExperiment, @@ -46,6 +49,25 @@ from varats.utils.git_commands import apply_patch +class AnalysisProjectStepBase(ProjectStep): + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + result_post_fix: str = "", + report_file_ending: str = "json", + reps=2 + ): + super().__init__(project=project) + self._binary = binary + self._report_file_ending = report_file_ending + self._result_pre_fix = result_post_fix + self._reps = reps + + class MultiPatchReport( BaseReport, tp.Generic[ReportTy], shorthand="MPR", file_type=".zip" ): @@ -90,6 +112,14 @@ def __init__(self, path: Path) -> None: super().__init__(path, TEFReportAggregate) +class MPRPIMA( + MultiPatchReport[TEFReportAggregate], shorthand="MPRPIMA", file_type=".zip" +): + + def __init__(self, path: Path) -> None: + super().__init__(path, PerfInfluenceTraceReportAggregate) + + class ReCompile(ProjectStep): NAME = "RECOMPILE" DESCRIPTION = "Recompile the project" @@ -139,7 +169,7 @@ def __str__(self, indent: int = 0) -> str: ) -class RunTEFTracedWorkloads(ProjectStep): # type: ignore +class RunGenTracedWorkloads(AnalysisProjectStepBase): # type: ignore """Executes the traced project binaries on the specified workloads.""" NAME = "VaRARunTracedBinaries" @@ -155,11 +185,9 @@ def __init__( report_file_ending: str = "json", reps=2 ): - super().__init__(project=project) - self.__binary = binary - self.__report_file_ending = report_file_ending - self.__result_pre_fix = result_post_fix - self.__reps = reps + super().__init__( + project, binary, result_post_fix, report_file_ending, reps + ) def __call__(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -172,15 +200,15 @@ def __str__(self, indent: int = 0) -> str: def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): - zip_tmp_dir = tmp_dir / f"{self.__result_pre_fix}_rep_measures" + zip_tmp_dir = tmp_dir / f"{self._result_pre_fix}_rep_measures" with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: - for rep in range(0, self.__reps): + for rep in range(0, self._reps): for prj_command in workload_commands( - self.project, self.__binary, [WorkloadCategory.EXAMPLE] + self.project, self._binary, [WorkloadCategory.EXAMPLE] ): local_tracefile_path = Path(reps_tmp_dir) / ( f"trace_{prj_command.command.label}_{rep}" - f".{self.__report_file_ending}" + f".{self._report_file_ending}" ) with local.env(VARA_TRACE_FILE=local_tracefile_path): pb_cmd = prj_command.command.as_plumbum( @@ -196,12 +224,79 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: with cleanup(prj_command): pb_cmd( *extra_options, - retcode=self.__binary.valid_exit_codes + retcode=self._binary.valid_exit_codes ) return StepResult.OK +def setup_actions_for_vara_experiment( + experiment: FeatureExperiment, project: VProject, + instr_type: FeatureInstrType, + analysis_step: tp.Type[AnalysisProjectStepBase] +) -> tp.MutableSequence[actions.Step]: + + project.cflags += experiment.get_vara_feature_cflags(project) + + threshold = 0 if project.DOMAIN.value is ProjectDomains.TEST else 100 + project.cflags += experiment.get_vara_tracing_cflags( + instr_type, project=project, instruction_threshold=threshold + ) + + project.ldflags += experiment.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, experiment + ) << bb_ext.time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, experiment + ) << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + experiment.get_handle(), project, experiment.REPORT_SPEC.main_report + ) + + binary = project.binaries[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + experiment.get_handle(), + experiment.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + + analysis_actions = [] + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + ZippedExperimentSteps( + result_filepath, [ # type: ignore + analysis_step( + project, binary, result_post_fix="old" + ), + ApplyPatch( + project, + Path( + "/home/vulder/git/FeaturePerfCSCollection/test.patch" + ) + ), + ReCompile(project), + analysis_step( + project, binary, result_post_fix="new" + ) + ] + ) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions + + class TEFProfileRunner(FeatureExperiment, shorthand="TEFp"): """Test runner for feature performance.""" @@ -219,67 +314,32 @@ def actions_for_project( Args: project: to analyze """ - instr_type = FeatureInstrType.TEF - - project.cflags += self.get_vara_feature_cflags(project) - - threshold = 0 if project.DOMAIN.value is ProjectDomains.TEST else 100 - project.cflags += self.get_vara_tracing_cflags( - instr_type, project=project, instruction_threshold=threshold + return setup_actions_for_vara_experiment( + self, project, FeatureInstrType.TEF, RunGenTracedWorkloads ) - project.ldflags += self.get_vara_tracing_ldflags() - - # Add the required runtime extensions to the project(s). - project.runtime_extension = bb_ext.run.RuntimeExtension( - project, self - ) << bb_ext.time.RunWithTime() - # Add the required compiler extensions to the project(s). - project.compiler_extension = bb_ext.compiler.RunCompiler( - project, self - ) << WithUnlimitedStackSize() - - # Add own error handler to compile step. - project.compile = get_default_compile_error_wrapped( - self.get_handle(), project, self.REPORT_SPEC.main_report - ) +class PIMProfileRunner(FeatureExperiment, shorthand="PIMp"): + """Test runner for feature performance.""" - binary = project.binaries[0] - if binary.type != BinaryType.EXECUTABLE: - raise AssertionError("Experiment only works with executables.") + NAME = "RunPIMProfiler" - result_filepath = create_new_success_result_filepath( - self.get_handle(), - self.get_handle().report_spec().main_report, project, binary, - get_current_config_id(project) - ) + REPORT_SPEC = ReportSpecification(MPRPIMA) - analysis_actions = [] + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. - analysis_actions.append(actions.Compile(project)) - analysis_actions.append( - ZippedExperimentSteps( - result_filepath, [ # type: ignore - RunTEFTracedWorkloads( - project, binary, result_post_fix="old" - ), - ApplyPatch( - project, - Path( - "/home/vulder/git/FeaturePerfCSCollection/test.patch" - ) - ), - ReCompile(project), - RunTEFTracedWorkloads( - project, binary, result_post_fix="new" - ) - ] - ) + Args: + project: to analyze + """ + return setup_actions_for_vara_experiment( + self, project, FeatureInstrType.PERF_INFLUENCE_TRACE, + RunGenTracedWorkloads ) - analysis_actions.append(actions.Clean(project)) - - return analysis_actions class RunBackBoxBaseline(ProjectStep): # type: ignore @@ -418,7 +478,7 @@ def actions_for_project( ################################################################################ -class RunTEFTracedWorkloadsOverhead(ProjectStep): # type: ignore +class RunGenTracedWorkloadsOverhead(AnalysisProjectStepBase): # type: ignore """Executes the traced project binaries on the specified workloads.""" NAME = "VaRARunTracedBinaries" @@ -430,13 +490,13 @@ def __init__( self, project: VProject, binary: ProjectBinaryWrapper, + result_post_fix: str = "", report_file_ending: str = "txt", reps=2 ): - super().__init__(project=project) - self.__binary = binary - self.__report_file_ending = report_file_ending - self.__reps = reps + super().__init__( + project, binary, result_post_fix, report_file_ending, reps + ) def __call__(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -449,9 +509,9 @@ def __str__(self, indent: int = 0) -> str: def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): - for rep in range(0, self.__reps): + for rep in range(0, self._reps): for prj_command in workload_commands( - self.project, self.__binary, [WorkloadCategory.EXAMPLE] + self.project, self._binary, [WorkloadCategory.EXAMPLE] ): base = Path("/tmp/") fake_tracefile_path = base / ( @@ -461,7 +521,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: time_report_file = tmp_dir / ( f"overhead_{prj_command.command.label}_{rep}" - f".{self.__report_file_ending}" + f".{self._report_file_ending}" ) with local.env(VARA_TRACE_FILE=fake_tracefile_path): @@ -477,12 +537,71 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: with cleanup(prj_command): timed_pb_cmd( *extra_options, - retcode=self.__binary.valid_exit_codes + retcode=self._binary.valid_exit_codes ) return StepResult.OK +def setup_actions_for_vara_overhead_experiment( + experiment: FeatureExperiment, project: VProject, + instr_type: FeatureInstrType, + analysis_step: tp.Type[AnalysisProjectStepBase] +) -> tp.MutableSequence[actions.Step]: + instr_type = FeatureInstrType.TEF + + project.cflags += experiment.get_vara_feature_cflags(project) + + threshold = 0 if project.DOMAIN.value is ProjectDomains.TEST else 100 + project.cflags += experiment.get_vara_tracing_cflags( + instr_type, project=project, instruction_threshold=threshold + ) + + project.ldflags += experiment.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, experiment + ) << bb_ext.time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, experiment + ) << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + experiment.get_handle(), project, experiment.REPORT_SPEC.main_report + ) + + binary = project.binaries[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + experiment.get_handle(), + experiment.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + + analysis_actions = [] + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + ZippedExperimentSteps( + result_filepath, + [ + analysis_step( # type: ignore + project, binary + ) + ] + ) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions + + class TEFProfileOverheadRunner(FeatureExperiment, shorthand="TEFo"): """Test runner for feature performance.""" @@ -500,57 +619,32 @@ def actions_for_project( Args: project: to analyze """ - instr_type = FeatureInstrType.TEF - - project.cflags += self.get_vara_feature_cflags(project) - - threshold = 0 if project.DOMAIN.value is ProjectDomains.TEST else 100 - project.cflags += self.get_vara_tracing_cflags( - instr_type, project=project, instruction_threshold=threshold + return setup_actions_for_vara_overhead_experiment( + self, project, FeatureInstrType.TEF, RunGenTracedWorkloadsOverhead ) - project.ldflags += self.get_vara_tracing_ldflags() - # Add the required runtime extensions to the project(s). - project.runtime_extension = bb_ext.run.RuntimeExtension( - project, self - ) << bb_ext.time.RunWithTime() - - # Add the required compiler extensions to the project(s). - project.compiler_extension = bb_ext.compiler.RunCompiler( - project, self - ) << WithUnlimitedStackSize() +class PIMProfileOverheadRunner(FeatureExperiment, shorthand="PIMo"): + """Test runner for feature performance.""" - # Add own error handler to compile step. - project.compile = get_default_compile_error_wrapped( - self.get_handle(), project, self.REPORT_SPEC.main_report - ) + NAME = "RunPIMProfilerO" - binary = project.binaries[0] - if binary.type != BinaryType.EXECUTABLE: - raise AssertionError("Experiment only works with executables.") - - result_filepath = create_new_success_result_filepath( - self.get_handle(), - self.get_handle().report_spec().main_report, project, binary, - get_current_config_id(project) - ) + REPORT_SPEC = ReportSpecification(TimeReportAggregate) - analysis_actions = [] + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. - analysis_actions.append(actions.Compile(project)) - analysis_actions.append( - ZippedExperimentSteps( - result_filepath, [ - RunTEFTracedWorkloadsOverhead( # type: ignore - project, binary - ) - ] - ) + Args: + project: to analyze + """ + return setup_actions_for_vara_overhead_experiment( + self, project, FeatureInstrType.PERF_INFLUENCE_TRACE, + RunGenTracedWorkloadsOverhead ) - analysis_actions.append(actions.Clean(project)) - - return analysis_actions class RunBackBoxBaselineOverhead(ProjectStep): # type: ignore diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index d3d353b66..5a8df6121 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -222,6 +222,12 @@ def recall(self) -> float: return self.TP / self.P def specificity(self) -> float: + if self.N == 0: + if self.TN == 0: + return 1.0 + + return 0.0 + return self.TN / self.N def accuracy(self) -> float: @@ -317,6 +323,66 @@ def is_regression(self, report_path: ReportFilepath) -> bool: return is_regression +class PIMTracer(Profiler): + """Profiler mapper implementation for the vara performance-influence-model + tracer.""" + + def __init__(self) -> None: + super().__init__( + "PIM Tracer", fpp.PIMProfileRunner, fpp.PIMProfileOverheadRunner, + fpp.MPRPIMA + ) + + def is_regression(self, report_path: ReportFilepath) -> bool: + """Checks if there was a regression between the old an new data.""" + is_regression = False + + multi_report = fpp.MultiPatchReport( + report_path.full_path(), fpp.PerfInfluenceTraceReportAggregate + ) + + old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for old_pim_report in multi_report.get_old_report().reports(): + for region_inter in old_pim_report.region_interaction_entries: + name = get_interactions_from_fr_string( + old_pim_report._translate_interaction( + region_inter.interaction + ) + ) + time = region_inter.time + old_acc_pim[name].append(time) + + new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for new_pim_report in multi_report.get_new_report().reports(): + for region_inter in new_pim_report.region_interaction_entries: + name = get_interactions_from_fr_string( + new_pim_report._translate_interaction( + region_inter.interaction + ) + ) + time = region_inter.time + new_acc_pim[name].append(time) + + # TODO: same for TEF + for feature, old_values in old_acc_pim.items(): + if feature in new_acc_pim: + new_values = new_acc_pim[feature] + ttest_res = ttest_ind(old_values, new_values) + + # TODO: check, maybe we need a "very small value cut off" + if ttest_res.pvalue < 0.05: + print( + f"{self.name} found regression for feature {feature}." + ) + is_regression = True + else: + print(f"Could not find feature {feature} in new trace.") + # TODO: how to handle this? + is_regression = True + + return is_regression + + class Baseline(Profiler): """Profiler mapper implementation for the black-box baseline.""" @@ -366,7 +432,7 @@ class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_loaded_paper_config().get_all_case_studies() - profilers: tp.List[Profiler] = [VXray()] + profilers: tp.List[Profiler] = [VXray(), PIMTracer()] # Data aggregation df = pd.DataFrame() @@ -578,7 +644,7 @@ class FeaturePerfOverheadTable(Table, table_name="fperf_overhead"): def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_loaded_paper_config().get_all_case_studies() - profilers: tp.List[Profiler] = [VXray()] + profilers: tp.List[Profiler] = [VXray(), PIMTracer()] # Data aggregation df = pd.DataFrame() From 39f604efd9ef69ccd6a98fcb8f1c3a0507bf9f32 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 9 Jul 2023 20:19:41 +0200 Subject: [PATCH 036/224] Fixes latex table gen for multiple profilers --- varats/varats/tables/feature_perf_precision.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 5a8df6121..903ba5e74 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -485,11 +485,6 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: df = pd.concat([df, pd.DataFrame(table_rows)]) df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") - #df.set_index( - # ["CaseStudy"], - # inplace=True, - #) - # df = df.astype({'RegressedConfigs': 'int'}) symb_precision = "\\textsc{PPV}" symb_recall = "\\textsc{TPR}" @@ -516,7 +511,10 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: kwargs: tp.Dict[str, tp.Any] = {} if table_format.is_latex(): kwargs["hrules"] = True - kwargs["column_format"] = "l|rr|rrr" + column_format = "l|rr" + for _ in profilers: + column_format += "|rrr" + kwargs["column_format"] = column_format kwargs["multicol_align"] = "|c" kwargs[ "caption" @@ -737,7 +735,10 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: kwargs: tp.Dict[str, tp.Any] = {} if table_format.is_latex(): kwargs["hrules"] = True - kwargs["column_format"] = "l|rr|rrrrrr" + column_format = "l|rr" + for _ in profilers: + column_format += "|rrrrrr" + kwargs["column_format"] = column_format kwargs["multicol_align"] = "|c" kwargs["caption" ] = """This table depicts the overhead measurement data. From 05e2d1a39a34f788f7a672a8a384f01ce8de2e56 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 9 Jul 2023 22:33:48 +0200 Subject: [PATCH 037/224] Refactors classification calculation into separate helper file --- varats/varats/data/metrics.py | 125 ++++++++++++++++++ .../varats/tables/feature_perf_precision.py | 78 +---------- 2 files changed, 126 insertions(+), 77 deletions(-) diff --git a/varats/varats/data/metrics.py b/varats/varats/data/metrics.py index 90826a6f2..f45a394a3 100644 --- a/varats/varats/data/metrics.py +++ b/varats/varats/data/metrics.py @@ -132,3 +132,128 @@ def min_max_normalize(values: pd.Series) -> pd.Series: max_value = values.max() min_value = values.min() return tp.cast(pd.Series, (values - min_value) / (max_value - min_value)) + + +class ClassificationResults: + """Helper class to automatically calculate classification results.""" + + def __init__( + self, actual_positive_values: tp.List[tp.Any], + actual_negative_values: tp.List[tp.Any], + predicted_positive_values: tp.List[tp.Any], + predicted_negative_values: tp.List[tp.Any] + ) -> None: + self.__actual_positive_values = actual_positive_values + self.__actual_negative_values = actual_negative_values + self.__predicted_positive_values = predicted_positive_values + self.__predicted_negative_values = predicted_negative_values + + ################### + # Base values + + @property + def P(self) -> int: # pylint: disable=C0103 + return len(self.__actual_positive_values) + + @property + def N(self) -> int: # pylint: disable=C0103 + return len(self.__actual_negative_values) + + @property + def PP(self) -> int: # pylint: disable=C0103 + return len(self.__predicted_positive_values) + + @property + def PN(self) -> int: # pylint: disable=C0103 + return len(self.__predicted_negative_values) + + ################### + # Combined values + + @property + def TP(self) -> int: # pylint: disable=C0103 + return len( + set(self.__actual_positive_values + ).intersection(self.__predicted_positive_values) + ) + + @property + def TN(self) -> int: # pylint: disable=C0103 + return len( + set(self.__actual_negative_values + ).intersection(self.__predicted_negative_values) + ) + + @property + def FP(self) -> int: # pylint: disable=C0103 + return self.PP - self.TP + + @property + def FN(self) -> int: # pylint: disable=C0103 + return self.PN - self.TN + + ################### + # Interpretations + + def precision(self) -> float: + """Positive predictive value (PPV)""" + if self.PP == 0: + if self.TP == 0: + return 1.0 + + return 0.0 + + return self.TP / self.PP + + def recall(self) -> float: + """True positive rate (TPR)""" + if self.P == 0: + if self.TP == 0: + return 1.0 + + return 0.0 + + return self.TP / self.P + + def specificity(self) -> float: + """True negative rate (TNR)""" + if self.N == 0: + if self.TN == 0: + return 1.0 + + return 0.0 + + return self.TN / self.N + + def accuracy(self) -> float: + """Accuracy (ACC)""" + if (self.P + self.N) == 0: + if (self.TP + self.TN) == 0: + return 1.0 + + return 0.0 + + return (self.TP + self.TN) / (self.P + self.N) + + def balanced_accuracy(self) -> float: + """ + Balanced accuracy (BA)/(bACC) + + Balanced accuracy can serve as an overall performance metric for a + model, whether or not the true labels are imbalanced in the data, + assuming the cost of FN is the same as FP. + """ + return (self.recall() + self.specificity()) / 2 + + def f1_score(self) -> float: + """In statistical analysis of binary classification, the F-score or + F-measure is a measure of a test's accuracy.""" + numerator = (2 * self.TP) + denominator = (2 * self.TP + self.FP + self.FN) + if denominator == 0.0: + if numerator == 0.0: + return 1.0 + + return 0.0 + + return numerator / denominator diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 903ba5e74..1bec56dc2 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -12,6 +12,7 @@ from scipy.stats import ttest_ind import varats.experiments.vara.feature_perf_precision as fpp +from varats.data.metrics import ClassificationResults from varats.experiments.vara.feature_experiment import FeatureExperiment from varats.experiments.vara.feature_perf_runner import FeaturePerfRunner from varats.jupyterhelper.file import load_tef_report @@ -160,83 +161,6 @@ def map_to_negative_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: ] -class ClassificationResults: - """Helper class to automatically calculate classification results.""" - - def __init__( - self, actual_positive_values: tp.List[tp.Any], - actual_negative_values: tp.List[tp.Any], - predicted_positive_values: tp.List[tp.Any], - predicted_negative_values: tp.List[tp.Any] - ) -> None: - self.__actual_positive_values = actual_positive_values - self.__actual_negative_values = actual_negative_values - self.__predicted_positive_values = predicted_positive_values - self.__predicted_negative_values = predicted_negative_values - - @property - def P(self) -> int: # pylint: disable=C0103 - return len(self.__actual_positive_values) - - @property - def N(self) -> int: # pylint: disable=C0103 - return len(self.__actual_negative_values) - - @property - def PP(self) -> int: # pylint: disable=C0103 - return len(self.__predicted_positive_values) - - @property - def PN(self) -> int: # pylint: disable=C0103 - return len(self.__predicted_negative_values) - - @property - def TP(self) -> int: # pylint: disable=C0103 - return len( - set(self.__actual_positive_values - ).intersection(self.__predicted_positive_values) - ) - - @property - def TN(self) -> int: # pylint: disable=C0103 - return len( - set(self.__actual_negative_values - ).intersection(self.__predicted_negative_values) - ) - - @property - def FP(self) -> int: # pylint: disable=C0103 - return self.PP - self.TP - - @property - def FN(self) -> int: # pylint: disable=C0103 - return self.PN - self.TN - - def precision(self) -> float: - if self.PP == 0: - return 0.0 - - return self.TP / self.PP - - def recall(self) -> float: - return self.TP / self.P - - def specificity(self) -> float: - if self.N == 0: - if self.TN == 0: - return 1.0 - - return 0.0 - - return self.TN / self.N - - def accuracy(self) -> float: - return (self.TP + self.TN) / (self.P + self.N) - - def balanced_accuracy(self) -> float: - return (self.recall() + self.specificity()) / 2 - - class Profiler(): """Profiler interface to add different profilers to the evaluation.""" From 14a0697b85dfa37c5b738f95adffa9f26ab96c54 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 10 Jul 2023 14:32:38 +0200 Subject: [PATCH 038/224] Removes unnecessary braces --- varats/varats/data/metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varats/varats/data/metrics.py b/varats/varats/data/metrics.py index f45a394a3..433f19295 100644 --- a/varats/varats/data/metrics.py +++ b/varats/varats/data/metrics.py @@ -248,8 +248,8 @@ def balanced_accuracy(self) -> float: def f1_score(self) -> float: """In statistical analysis of binary classification, the F-score or F-measure is a measure of a test's accuracy.""" - numerator = (2 * self.TP) - denominator = (2 * self.TP + self.FP + self.FN) + numerator = 2 * self.TP + denominator = 2 * self.TP + self.FP + self.FN if denominator == 0.0: if numerator == 0.0: return 1.0 From cd0a75ad5f86fd43f0f9337d31f2721e333bc4ac Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 11 Jul 2023 15:34:43 +0200 Subject: [PATCH 039/224] Adds total to precision table --- .../varats/tables/feature_perf_precision.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 1bec56dc2..78188e1be 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -410,6 +410,25 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") + # insert totals + totals = { + 'CaseStudy': "Total (avg)", + "Configs": 0, + "RegressedConfigs": 0 + } + for profiler in profilers: + totals[f"{profiler.name}_precision"] = df[ + f"{profiler.name}_precision"].mean() + totals[f"{profiler.name}_recall"] = df[f"{profiler.name}_recall" + ].mean() + totals[f"{profiler.name}_baccuracy"] = df[ + f"{profiler.name}_baccuracy"].mean() + + tdf = pd.DataFrame(totals, index=[0]) + df = pd.concat([df, tdf], ignore_index=True) + + print(f"{df=}") + symb_precision = "\\textsc{PPV}" symb_recall = "\\textsc{TPR}" symb_b_accuracy = "\\textsc{BA}" From 9c0e1ae49adad558dce626f3402b712fe53ed909 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 11 Jul 2023 20:26:18 +0200 Subject: [PATCH 040/224] Adds docs to classification results --- varats/varats/data/metrics.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/varats/varats/data/metrics.py b/varats/varats/data/metrics.py index 433f19295..53f7d1842 100644 --- a/varats/varats/data/metrics.py +++ b/varats/varats/data/metrics.py @@ -135,7 +135,14 @@ def min_max_normalize(values: pd.Series) -> pd.Series: class ClassificationResults: - """Helper class to automatically calculate classification results.""" + """ + Helper class to automatically calculate classification results. + + | Predicted Positive (PP) | Predicted Negative (PN) + --------------------|---------------------------|-------------------------- + Actual Positive (P) | True Positive (TP) | False Negative (FN) + Actual Negative (N) | False Positive (FP) | True Negative (TN) + """ def __init__( self, actual_positive_values: tp.List[tp.Any], From feee86d83a78343e1242dfa51c71a6e1d9d8e453 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 11 Jul 2023 21:21:22 +0200 Subject: [PATCH 041/224] Adds test for precision score --- tests/data/test_metrics.py | 120 ++++++++++++++++++++++++++++++++++ varats/varats/data/metrics.py | 2 + 2 files changed, 122 insertions(+) diff --git a/tests/data/test_metrics.py b/tests/data/test_metrics.py index daeb66856..473fa53de 100644 --- a/tests/data/test_metrics.py +++ b/tests/data/test_metrics.py @@ -8,6 +8,7 @@ lorenz_curve, gini_coefficient, normalized_gini_coefficient, + ClassificationResults, ) @@ -89,3 +90,122 @@ def test_normalized_gini_for_one_value(self): expected = 0 self.assertEqual(expected, normalized_gini_coefficient(data_only_one)) + + +class TestClassificationResults(unittest.TestCase): + """Test if the classification metrics are correctly calculated.""" + + all_good: ClassificationResults + all_bad: ClassificationResults + balanced_50_50: ClassificationResults + skewed_positiv_entries: ClassificationResults + skewed_negative_entries: ClassificationResults + + @classmethod + def setUpClass(cls) -> None: + cls.all_good = ClassificationResults([1, 2, 3], [4, 5, 6], [1, 2, 3], + [4, 5, 6]) + + cls.all_bad = ClassificationResults([1, 2, 3], [4, 5, 6], [4, 5, 6], + [1, 2, 3]) + cls.balanced_50_50 = ClassificationResults([1, 2, 3, 4], [5, 6, 7, 8], + [1, 2, 5, 6], [3, 4, 7, 8]) + + cls.skewed_positiv_entries = ClassificationResults([ + 2, 3, 4, 5, 6, 7, 8, 9 + ], [1], [3, 4, 5, 6, 7, 8, 9], [1, 2]) + cls.skewed_negative_entries = ClassificationResults([1], [ + 2, 3, 4, 5, 6, 7, 8, 9 + ], [1, 2], [3, 4, 5, 6, 7, 8, 9]) + + def test_true_positive(self) -> None: + """Test if true positives are correctly calculated.""" + self.assertEqual(self.all_good.TP, 3) + self.assertEqual(self.all_bad.TP, 0) + self.assertEqual(self.balanced_50_50.TP, 2) + self.assertEqual(self.skewed_positiv_entries.TP, 7) + self.assertEqual(self.skewed_negative_entries.TP, 1) + + def test_false_positive(self) -> None: + """Test if false positives are correctly calculated.""" + self.assertEqual(self.all_good.FP, 0) + self.assertEqual(self.all_bad.FP, 3) + self.assertEqual(self.balanced_50_50.FP, 2) + self.assertEqual(self.skewed_positiv_entries.FP, 0) + self.assertEqual(self.skewed_negative_entries.FP, 1) + + def test_true_negative(self) -> None: + """Test if true negatives are correctly calculated.""" + self.assertEqual(self.all_good.TN, 3) + self.assertEqual(self.all_bad.TN, 0) + self.assertEqual(self.balanced_50_50.TN, 2) + self.assertEqual(self.skewed_positiv_entries.TN, 1) + self.assertEqual(self.skewed_negative_entries.TN, 7) + + def test_false_negative(self) -> None: + """Test if false negatives are correctly calculated.""" + self.assertEqual(self.all_good.FN, 0) + self.assertEqual(self.all_bad.FN, 3) + self.assertEqual(self.balanced_50_50.FN, 2) + self.assertEqual(self.skewed_positiv_entries.FN, 1) + self.assertEqual(self.skewed_negative_entries.FN, 0) + + def test_precision(self) -> None: + """Test if precision are correctly calculated.""" + self.assertEqual(self.all_good.precision(), 1.0) + self.assertEqual(self.all_bad.precision(), 0.0) + self.assertEqual(self.balanced_50_50.precision(), 0.5) + self.assertEqual(self.skewed_positiv_entries.precision(), 1.0) + self.assertEqual(self.skewed_negative_entries.precision(), 0.5) + + def test_recall(self) -> None: + """Test if recall are correctly calculated.""" + self.assertEqual(self.all_good.recall(), 1.0) + self.assertEqual(self.all_bad.recall(), 0.0) + self.assertEqual(self.balanced_50_50.recall(), 0.5) + self.assertEqual(self.skewed_positiv_entries.recall(), 0.875) + self.assertEqual(self.skewed_negative_entries.recall(), 1.0) + + def test_specificity(self) -> None: + """Test if specificity are correctly calculated.""" + self.assertEqual(self.all_good.specificity(), 1.0) + self.assertEqual(self.all_bad.specificity(), 0.0) + self.assertEqual(self.balanced_50_50.specificity(), 0.5) + self.assertEqual(self.skewed_positiv_entries.specificity(), 1.0) + self.assertEqual(self.skewed_negative_entries.specificity(), 0.875) + + def test_accuracy(self) -> None: + """Test if accuracy are correctly calculated.""" + self.assertEqual(self.all_good.accuracy(), 1.0) + self.assertEqual(self.all_bad.accuracy(), 0.0) + self.assertEqual(self.balanced_50_50.accuracy(), 0.5) + self.assertAlmostEqual( + self.skewed_positiv_entries.accuracy(), 0.88888888, places=7 + ) + self.assertAlmostEqual( + self.skewed_negative_entries.accuracy(), 0.88888888, places=7 + ) + + def test_balanced_accuracy(self) -> None: + """Test if balanced_accuracy are correctly calculated.""" + self.assertEqual(self.all_good.balanced_accuracy(), 1.0) + self.assertEqual(self.all_bad.balanced_accuracy(), 0.0) + self.assertEqual(self.balanced_50_50.balanced_accuracy(), 0.5) + self.assertAlmostEqual( + self.skewed_positiv_entries.balanced_accuracy(), 0.9375, places=4 + ) + self.assertAlmostEqual( + self.skewed_negative_entries.balanced_accuracy(), 0.9375, places=4 + ) + + def test_f1_score(self) -> None: + """Test if f1 score are correctly calculated.""" + self.assertEqual(self.all_good.f1_score(), 1.0) + self.assertEqual(self.all_bad.f1_score(), 0.0) + self.assertEqual(self.balanced_50_50.f1_score(), 0.5) + self.assertAlmostEqual( + self.skewed_positiv_entries.f1_score(), 0.93333333, places=7 + ) + self.assertAlmostEqual( + self.skewed_negative_entries.f1_score(), 0.66666666, places=7 + ) diff --git a/varats/varats/data/metrics.py b/varats/varats/data/metrics.py index 53f7d1842..22c119941 100644 --- a/varats/varats/data/metrics.py +++ b/varats/varats/data/metrics.py @@ -142,6 +142,8 @@ class ClassificationResults: --------------------|---------------------------|-------------------------- Actual Positive (P) | True Positive (TP) | False Negative (FN) Actual Negative (N) | False Positive (FP) | True Negative (TN) + + Reference: https://en.wikipedia.org/wiki/Precision_and_recall """ def __init__( From 236982be1884785835e8eee9bde9f8774b8b1ee5 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Thu, 13 Jul 2023 16:23:19 +0200 Subject: [PATCH 042/224] Separated XML tests to yaml format --- .../exclude-revision-range.info | 8 ++++++++ .../exclude-single-and-revision-range.info | 10 ++++++++++ .../exclude-single-revision.info | 6 ++++++ .../include-range-exclude-range.info | 12 ++++++++++++ .../include-range-exclude-single.info | 10 ++++++++++ .../include-revision-range.info | 8 ++++++++ .../include-single-and-revision-range.info | 9 +++++++++ .../include-single-revision.info | 6 ++++++ 8 files changed, 69 insertions(+) create mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-revision-range.info create mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-single-and-revision-range.info create mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-single-revision.info create mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-range-exclude-range.info create mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-range-exclude-single.info create mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-revision-range.info create mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-single-and-revision-range.info create mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-single-revision.info diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-revision-range.info b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-revision-range.info new file mode 100644 index 000000000..66b6e46fc --- /dev/null +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-revision-range.info @@ -0,0 +1,8 @@ +description: Patch that excludes a range of commits (Otherwise includes all) +exclude_revisions: + revision_range: + end: 8ca5cc28e6746eef7340064b5d843631841bf31e + start: 01f9f1f07bef22d4248e8349aba4f0c1f204607e +path: bug.patch +project_name: FeaturePerfCSCollection +shortname: exclude-revision-range diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-single-and-revision-range.info b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-single-and-revision-range.info new file mode 100644 index 000000000..44a149e99 --- /dev/null +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-single-and-revision-range.info @@ -0,0 +1,10 @@ +description: Patch that excludes a certain range and individual commits (Otherwise + includes all) +exclude_revisions: + revision_range: + end: 8ca5cc28e6746eef7340064b5d843631841bf31e + start: 01f9f1f07bef22d4248e8349aba4f0c1f204607e + single_revision: 27f17080376e409860405c40744887d81d6b3f34 +path: bug.patch +project_name: FeaturePerfCSCollection +shortname: exclude-single-and-revision-range diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-single-revision.info b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-single-revision.info new file mode 100644 index 000000000..f15dd79aa --- /dev/null +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/exclude-single-revision.info @@ -0,0 +1,6 @@ +description: Patch that is valid for all commits except a single one +exclude_revisions: + single_revision: 8ca5cc28e6746eef7340064b5d843631841bf31e +path: bug.patch +project_name: FeaturePerfCSCollection +shortname: exclude-single-revision diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-range-exclude-range.info b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-range-exclude-range.info new file mode 100644 index 000000000..f65d12fdd --- /dev/null +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-range-exclude-range.info @@ -0,0 +1,12 @@ +description: Patch valid for a range of commits where a subrange is explicitly excluded +exclude_revisions: + revision_range: + end: a94fb35ca49719028a1c50bdbc2fb82122043f46 + start: c051e44a973ee31b3baa571407694467a513ba68 +include_revisions: + revision_range: + end: 4300ea495e7f013f68e785fdde5c4ead81297999 + start: 01f9f1f07bef22d4248e8349aba4f0c1f204607e +path: bug.patch +project_name: FeaturePerfCSCollection +shortname: include-range-exclude-range diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-range-exclude-single.info b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-range-exclude-single.info new file mode 100644 index 000000000..dae52ba93 --- /dev/null +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-range-exclude-single.info @@ -0,0 +1,10 @@ +description: Patch valid for a range of commits where a single one is excluded +exclude_revisions: + single_revision: 162db88346b06be20faac6976f1ff9bad986accf +include_revisions: + revision_range: + end: 8ca5cc28e6746eef7340064b5d843631841bf31e + start: 01f9f1f07bef22d4248e8349aba4f0c1f204607e +path: bug.patch +project_name: FeaturePerfCSCollection +shortname: include-range-exclude-single diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-revision-range.info b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-revision-range.info new file mode 100644 index 000000000..94d9e4f62 --- /dev/null +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-revision-range.info @@ -0,0 +1,8 @@ +description: Patch that is valid for a range of commits +include_revisions: + revision_range: + end: 8ca5cc28e6746eef7340064b5d843631841bf31e + start: 01f9f1f07bef22d4248e8349aba4f0c1f204607e +path: bug.patch +project_name: FeaturePerfCSCollection +shortname: include-revision-range diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-single-and-revision-range.info b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-single-and-revision-range.info new file mode 100644 index 000000000..85698d69b --- /dev/null +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-single-and-revision-range.info @@ -0,0 +1,9 @@ +description: Patch that is valid for a revision range AND another single commit +include_revisions: + revision_range: + end: 8ca5cc28e6746eef7340064b5d843631841bf31e + start: 01f9f1f07bef22d4248e8349aba4f0c1f204607e + single_revision: 27f17080376e409860405c40744887d81d6b3f34 +path: bug.patch +project_name: FeaturePerfCSCollection +shortname: include-single-and-revision-range diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-single-revision.info b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-single-revision.info new file mode 100644 index 000000000..ed9c836f3 --- /dev/null +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/include-single-revision.info @@ -0,0 +1,6 @@ +description: Patch that is valid for a single revision +include_revisions: + single_revision: 8ca5cc28e6746eef7340064b5d843631841bf31e +path: bug.patch +project_name: FeaturePerfCSCollection +shortname: include-single-revision From aa6840599a6a3ced99279aca4207f2f520f263e3 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Thu, 13 Jul 2023 16:49:05 +0200 Subject: [PATCH 043/224] Added yaml parsing for patch information --- .../test-patch-configuration.xml | 4 ++ .../unrestricted-range.info | 5 ++ tests/provider/test_patch_provider.py | 2 +- .../varats/provider/patch/patch_provider.py | 59 ++++++++++++++++++- 4 files changed, 68 insertions(+), 2 deletions(-) rename tests/TEST_INPUTS/{patch-configs => patch_configs}/FeaturePerfCSCollection/test-patch-configuration.xml (97%) create mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/unrestricted-range.info diff --git a/tests/TEST_INPUTS/patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/test-patch-configuration.xml similarity index 97% rename from tests/TEST_INPUTS/patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml rename to tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/test-patch-configuration.xml index 5bda2cbec..b67d7c9dd 100644 --- a/tests/TEST_INPUTS/patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/test-patch-configuration.xml @@ -29,6 +29,10 @@ 01f9f1f07bef22d4248e8349aba4f0c1f204607e 8ca5cc28e6746eef7340064b5d843631841bf31e + + start2 + end2 + diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/unrestricted-range.info b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/unrestricted-range.info new file mode 100644 index 000000000..b2e96bf38 --- /dev/null +++ b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/unrestricted-range.info @@ -0,0 +1,5 @@ +description: Patch describing an unrestricted range of commits, meaning it should + include all commits +path: bug.patch +project_name: FeaturePerfCSCollection +shortname: unrestricted-range diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index bf832bff0..00b9d56dc 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -46,7 +46,7 @@ def setUpClass(cls) -> None: patch_config = ProjectPatchesConfiguration.from_xml( Path( TEST_INPUTS_DIR / - 'patch-configs/FeaturePerfCSCollection/test-patch-configuration.xml' + 'patch_configs/FeaturePerfCSCollection/test-patch-configuration.xml' ) ) cls.patch_config = patch_config diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 2b194a612..73632d683 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -5,6 +5,7 @@ from pathlib import Path import benchbuild as bb +import yaml from benchbuild.project import Project from benchbuild.source.base import target_prefix from benchbuild.utils import actions @@ -15,6 +16,7 @@ ) from plumbum import local +from varats.project.project_util import get_local_project_git_path from varats.provider.provider import Provider, ProviderType from varats.utils.git_util import CommitHash, ShortCommitHash @@ -88,13 +90,68 @@ def __init__( shortname: str, description: str, path: Path, - valid_revisions: tp.Optional[tp.Set[CommitHash]] = None + valid_revisions: tp.Optional[tp.Set[CommitHash]] = None, + tags: tp.Optional[tp.Set[str]] = None ): self.project_name: str = project_name self.shortname: str = shortname self.description: str = description self.path: Path = path self.valid_revisions: tp.Optional[tp.Set[CommitHash]] = valid_revisions + self.tags: tp.Optional[tp.Set[str]] = tags + + @staticmethod + def from_yaml(yaml_path: Path): + """Creates a Patch from a YAML file.""" + + yaml_dict = yaml.safe_load(yaml_path.read_text()) + + project_name = yaml_dict["project_name"] + shortname = yaml_dict["shortname"] + description = yaml_dict["description"] + path = yaml_dict["path"] + tags = yaml_dict["tags"] + + main_repo_git = _get_git_for_path(get_local_project_git_path(project_name)) + + def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: + res: tp.Set[CommitHash] = set() + + if "single_revision" in rev_dict: + if type(rev_dict["single_revision"]) == str: + res.add(ShortCommitHash(rev_dict["single_revision"])) + else: + res.update([ShortCommitHash(r) for r in rev_dict["single_revision"]]) + + if "revision_range" in rev_dict: + if type(rev_dict["revision_range"]) == list: + for rev_range in rev_dict["revision_range"]: + res.update({ShortCommitHash(h) for h in _get_all_revisions_between( + rev_range["start"], + rev_range["end"], + main_repo_git)}) + else: + res.update({ShortCommitHash(h) for h in _get_all_revisions_between( + rev_dict["revision_range"]["start"], + rev_dict["revision_range"]["end"], + main_repo_git + )}) + + return res + + if "include_revisions" in yaml_dict: + include_revisions = parse_revisions(yaml_dict["include_revisions"]) + else: + include_revisions = { + ShortCommitHash(h) + for h in main_repo_git('log', '--pretty=%H', '--first-parent' + ).strip().split() + } + + if "exclude_revisions" in yaml_dict: + include_revisions.difference_update(parse_revisions(yaml_dict["exclude_revisions"])) + + return Patch(project_name, shortname, description, path, include_revisions, tags) class ProjectPatchesConfiguration: From b4fee57b502a1874187ac7891575137ab083e62b Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Sat, 15 Jul 2023 19:04:38 +0200 Subject: [PATCH 044/224] Revert irrelevant changes in JustCompile Project --- .../varats/experiments/base/just_compile.py | 58 ++----------------- 1 file changed, 6 insertions(+), 52 deletions(-) diff --git a/varats/varats/experiments/base/just_compile.py b/varats/varats/experiments/base/just_compile.py index 24197a3e8..0531bd878 100644 --- a/varats/varats/experiments/base/just_compile.py +++ b/varats/varats/experiments/base/just_compile.py @@ -1,5 +1,5 @@ """Implements an empty experiment that just compiles the project.""" -import textwrap + import typing as tp from benchbuild import Project @@ -19,13 +19,7 @@ ) from varats.experiment.wllvm import RunWLLVM from varats.project.varats_project import VProject -from varats.provider.patch.patch_provider import ( - Patch, - PatchProvider, - wrap_action_list_with_patch, -) from varats.report.report import ReportSpecification -from varats.utils.git_util import ShortCommitHash # Please take care when changing this file, see docs experiments/just_compile @@ -37,24 +31,13 @@ class EmptyAnalysis(actions.ProjectStep): # type: ignore project: VProject - def __init__( - self, - project: Project, - experiment_handle: ExperimentHandle, - patch: tp.Optional[Patch] = None - ): + def __init__(self, project: Project, experiment_handle: ExperimentHandle): super().__init__(project=project) self.__experiment_handle = experiment_handle - self.__patch = patch def __call__(self) -> actions.StepResult: return self.analyze() - def __str__(self, indent: int = 0) -> str: - return textwrap.indent( - f"* {self.project.name}: EmptyAnalysis", " " * indent - ) - def analyze(self) -> actions.StepResult: """Only create a report file.""" @@ -63,7 +46,7 @@ def analyze(self) -> actions.StepResult: for binary in self.project.binaries: result_file = create_new_success_result_filepath( self.__experiment_handle, EmptyReport, self.project, binary, - config_id, self.__patch.shortname if self.__patch else None + config_id ) run_cmd = touch[f"{result_file}"] @@ -86,9 +69,6 @@ class JustCompileReport(VersionExperiment, shorthand="JC"): REPORT_SPEC = ReportSpecification(EmptyReport) - # WIP Patch Support - __USE_PATCHES = True - def actions_for_project( self, project: Project ) -> tp.MutableSequence[actions.Step]: @@ -97,12 +77,12 @@ def actions_for_project( # Add the required runtime extensions to the project(s). project.runtime_extension = run.RuntimeExtension(project, self) \ - << time.RunWithTime() + << time.RunWithTime() # Add the required compiler extensions to the project(s). project.compiler_extension = compiler.RunCompiler(project, self) \ - << RunWLLVM() \ - << run.WithTimeout() + << RunWLLVM() \ + << run.WithTimeout() project.compile = get_default_compile_error_wrapped( self.get_handle(), project, self.REPORT_SPEC.main_report @@ -113,30 +93,4 @@ def actions_for_project( analysis_actions.append(EmptyAnalysis(project, self.get_handle())) analysis_actions.append(actions.Clean(project)) - if self.__USE_PATCHES: - - patch_provider = PatchProvider.create_provider_for_project(project) - - patches = [] - if patch_provider: - config = patch_provider.patches_config - patches = config.get_patches_for_revision( - ShortCommitHash(str(project.revision)) - ) - - for patch in patches: - patch_actions = [ - actions.Compile(project), - EmptyAnalysis(project, self.get_handle(), patch=patch), - actions.Clean(project) - ] - - analysis_actions.append( - actions.RequireAll( - wrap_action_list_with_patch( - patch_actions, project, patch - ) - ) - ) - return analysis_actions From 9751a5eea337952cfe79f3f86b474c5513d82c60 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Sat, 15 Jul 2023 23:33:50 +0200 Subject: [PATCH 045/224] Changed PatchProvider to initialize itself from multiple info files for patches --- .../varats/provider/patch/patch_provider.py | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 73632d683..437e1cec5 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -106,6 +106,10 @@ def from_yaml(yaml_path: Path): yaml_dict = yaml.safe_load(yaml_path.read_text()) + if not yaml_dict: + # TODO: Proper Error/warning + raise PatchesNotFoundError() + project_name = yaml_dict["project_name"] shortname = yaml_dict["shortname"] description = yaml_dict["description"] @@ -288,13 +292,25 @@ def __init__(self, project: tp.Type[Project]): # TODO: Error handling/warning and None raise PatchesNotFoundError() - conf_file = Path(patches_project_dir / ".patches.xml") + patches = set() - if not conf_file.exists(): - # TODO: Error handling/warning and None - raise PatchesNotFoundError() + for root, dirs, files in os.walk(patches_project_dir): + for filename in files: + if not filename.endswith(".info"): + continue + + info_path = Path(os.path.join(root,filename)) + current_patch = Patch.from_yaml(info_path) + + patches.add(current_patch) + + self.__patches: tp.Set[Patch] = patches + + def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: + pass - self.patches_config = ProjectPatchesConfiguration.from_xml(conf_file) + def get_patches_for_revision(self, revision: CommitHash): + pass @classmethod def create_provider_for_project( From a7776f3eddf00f99e325cfe1f14041089a5235a5 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Sat, 15 Jul 2023 23:36:24 +0200 Subject: [PATCH 046/224] Added patch accessors to Patch provider --- varats-core/varats/provider/patch/patch_provider.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 437e1cec5..f7b084a1f 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -307,10 +307,16 @@ def __init__(self, project: tp.Type[Project]): self.__patches: tp.Set[Patch] = patches def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: - pass + for patch in self.__patches: + if patch.shortname == shortname: + return patch + + return None def get_patches_for_revision(self, revision: CommitHash): - pass + """Returns all patches that are valid for the given revision.""" + + return {p for p in self.__patches if revision in p.valid_revisions} @classmethod def create_provider_for_project( From 49770c4638ed695e73cc2a3bf5b98fe6ee911d6b Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Sat, 15 Jul 2023 23:37:26 +0200 Subject: [PATCH 047/224] Removed obsolete ProjectPatchesConfiguration - functionality is now available directly in PatchProvider --- .../varats/provider/patch/patch_provider.py | 110 ------------------ 1 file changed, 110 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index f7b084a1f..67a089a78 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -158,116 +158,6 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: return Patch(project_name, shortname, description, path, include_revisions, tags) -class ProjectPatchesConfiguration: - """A class storing a set of patches specific to a project.""" - - def __init__( - self, project_name: str, repository: str, patches: tp.List[Patch] - ): - self.project_name: str = project_name - self.repository: str = repository - self.patches: tp.List[Patch] = patches - - def get_patches_for_revision(self, revision: CommitHash) -> tp.Set[Patch]: - """Returns all patches that are valid for the given revision.""" - - return {p for p in self.patches if revision in p.valid_revisions} - - def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: - """Returns the patch with the given shortname.""" - - for patch in self.patches: - if patch.shortname == shortname: - return patch - - return None - - @staticmethod - def from_xml(xml_path: Path): - """Creates a ProjectPatchesConfiguration from an XML file.""" - - base_dir = xml_path.parent - - project_name: str = Path(os.path.abspath(xml_path)).parts[-2] - tree = ET.parse(xml_path) - root = tree.getroot() - - if len(root.findall("repository")) != 1: - # TODO: Proper error handling - raise RuntimeError("Only one repository allowed") - - repository = root.findtext("repository") - - project_git_source = bb.source.Git( - remote=repository, - local=project_name, - refspec="origin/HEAD", - shallow=False, - ) - - project_git_source.fetch() - - repo_git = _get_git_for_path(target_prefix() + "/" + project_name) - patch_list: tp.List[Patch] = [] - - def parse_revisions(revisions_tag: ET.Element) -> tp.Set[CommitHash]: - res: tp.Set[CommitHash] = set() - - for revision_tag in revisions_tag.findall("single_revision"): - res.add(ShortCommitHash(revision_tag.text.strip())) - - for revision_range_tag in revisions_tag.findall("revision_range"): - start_tag = revision_range_tag.find("start") - end_tag = revision_range_tag.find("end") - - res.update({ - ShortCommitHash(h) for h in _get_all_revisions_between( - start_tag.text.strip(), end_tag.text.strip(), repo_git - ) - }) - - return res - - # We explicitly ignore further validity checking of the XML - # As for now, this is already done by a CI Job - for patch in root.find("patches").findall("patch"): - shortname = patch.findtext("shortname") - description = patch.findtext("description") - - path = Path(patch.findtext("path")) - - if not path.is_absolute(): - path = base_dir / path - - include_revisions: tp.Set[CommitHash] = set() - - include_revs_tag = patch.find("include_revisions") - - if include_revs_tag: - include_revisions = parse_revisions(include_revs_tag) - else: - include_revisions = { - ShortCommitHash(h) - for h in repo_git('log', '--pretty=%H', '--first-parent' - ).strip().split() - } - - exclude_revs_tag = patch.find("exclude_revisions") - - if exclude_revs_tag: - revs = parse_revisions(exclude_revs_tag) - include_revisions.difference_update(revs) - - patch_list.append( - Patch( - project_name, shortname, description, path, - include_revisions - ) - ) - - return ProjectPatchesConfiguration(project_name, repository, patch_list) - - class PatchesNotFoundError(FileNotFoundError): # TODO: Implement me pass From 89705c079430a4dd9ba0729259fddbaf46154714 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Sat, 15 Jul 2023 23:58:05 +0200 Subject: [PATCH 048/224] remove unnecessary functions --- .../varats/provider/patch/patch_provider.py | 34 +------------------ 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 67a089a78..0cc844538 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -250,36 +250,4 @@ def _get_patches_repository_path() -> Path: patches_source.fetch() - return Path(Path(target_prefix()) / patches_source.local) - - -def create_patch_action_list(project: Project, - standard_actions: tp.MutableSequence[actions.Step], - commit: CommitHash) \ - -> tp.Mapping[str, tp.MutableSequence[actions.Step]]: - """Creates a map of actions for applying all patches that are valid for the - given revision.""" - result_actions = {} - - patch_provider = PatchProvider.create_provider_for_project(project) - patches = patch_provider.patches_config.get_patches_for_revision(commit) - - for patch in patches: - result_actions[patch.shortname] = [ - actions.MakeBuildDir(project), - actions.ProjectEnvironment(project), - ApplyPatch(project, patch), *standard_actions - ] - - return result_actions - - -def wrap_action_list_with_patch(action_list: tp.MutableSequence[actions.Step], - project: Project, patch: Patch) \ - -> tp.MutableSequence[actions.Step]: - """Wraps the given action list with the given patch.""" - return [ - actions.MakeBuildDir(project), - actions.ProjectEnvironment(project), - ApplyPatch(project, patch), *action_list - ] + return Path(Path(target_prefix()) / patches_source.local) \ No newline at end of file From d8975c564a01d37cad7f78927cf55476c9a4e0eb Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Sun, 16 Jul 2023 00:07:06 +0200 Subject: [PATCH 049/224] Fixed unit tests for new yaml based structure --- tests/provider/test_patch_provider.py | 16 +++++++--------- .../varats/provider/patch/patch_provider.py | 8 +++++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index 00b9d56dc..7adda0b75 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -11,9 +11,10 @@ FeaturePerfCSCollection, ) from varats.provider.patch.patch_provider import ( - ProjectPatchesConfiguration, PatchProvider, + Patch, ) + from varats.utils.git_util import ShortCommitHash @@ -32,10 +33,10 @@ def test_get_patch_by_shortname(self): ) self.assertIsNotNone(provider) - patch = provider.patches_config.get_by_shortname("patch-10") + patch = provider.get_by_shortname("patch-10") self.assertIsNotNone(patch) - patch = provider.patches_config.get_by_shortname("dummy-patch") + patch = provider.get_by_shortname("dummy-patch") self.assertIsNone(patch) @@ -43,13 +44,10 @@ class TestPatchRevisionRanges(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - patch_config = ProjectPatchesConfiguration.from_xml( - Path( + cls.patch_base_path = Path( TEST_INPUTS_DIR / - 'patch_configs/FeaturePerfCSCollection/test-patch-configuration.xml' + 'patch_configs/FeaturePerfCSCollection/' ) - ) - cls.patch_config = patch_config project_git_source = bb.source.Git( remote="git@github.com:se-sic/FeaturePerfCSCollection.git", @@ -72,7 +70,7 @@ def setUpClass(cls) -> None: def __test_patch_revisions( self, shortname: str, expected_revisions: set[ShortCommitHash] ): - patch = self.patch_config.get_by_shortname(shortname) + patch = Patch.from_yaml(self.patch_base_path/f"{shortname}.info") self.assertSetEqual(expected_revisions, patch.valid_revisions) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 0cc844538..b770fa370 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -1,7 +1,6 @@ import os import textwrap import typing as tp -import xml.etree.ElementTree as ET from pathlib import Path import benchbuild as bb @@ -21,7 +20,7 @@ from varats.utils.git_util import CommitHash, ShortCommitHash -def __get_project_git(project: Project) -> tp.Optional[local.cmd]: +def __get_project_git(project: Project): return _get_git_for_path( local.path(project.source_of(project.primary_source)) ) @@ -114,7 +113,10 @@ def from_yaml(yaml_path: Path): shortname = yaml_dict["shortname"] description = yaml_dict["description"] path = yaml_dict["path"] - tags = yaml_dict["tags"] + + tags = None + if "tags" in yaml_dict: + tags = yaml_dict["tags"] main_repo_git = _get_git_for_path(get_local_project_git_path(project_name)) From 4a35b28a2dea64f878f7a484ebed95a5337ffc95 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Sun, 16 Jul 2023 00:07:55 +0200 Subject: [PATCH 050/224] Removed old test XML --- .../test-patch-configuration.xml | 129 ------------------ 1 file changed, 129 deletions(-) delete mode 100644 tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/test-patch-configuration.xml diff --git a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/test-patch-configuration.xml b/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/test-patch-configuration.xml deleted file mode 100644 index b67d7c9dd..000000000 --- a/tests/TEST_INPUTS/patch_configs/FeaturePerfCSCollection/test-patch-configuration.xml +++ /dev/null @@ -1,129 +0,0 @@ - - git@github.com:se-sic/FeaturePerfCSCollection.git - - - unrestricted-range - - Patch describing an unrestricted range of commits, meaning it should include all commits - - bug.patch - - - include-single-revision - - Patch that is valid for a single revision - - bug.patch - - 8ca5cc28e6746eef7340064b5d843631841bf31e - - - - include-revision-range - - Patch that is valid for a range of commits - - bug.patch - - - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e - - - start2 - end2 - - - - - include-single-and-revision-range - - Patch that is valid for a revision range AND another single commit - - bug.patch - - - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e - - - 27f17080376e409860405c40744887d81d6b3f34 - - - - - exclude-single-revision - - Patch that is valid for all commits except a single one - - bug.patch - - 8ca5cc28e6746eef7340064b5d843631841bf31e - - - - exclude-revision-range - - Patch that excludes a range of commits (Otherwise includes all) - - bug.patch - - - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e - - - - - exclude-single-and-revision-range - - Patch that excludes a certain range and individual commits (Otherwise includes all) - - bug.patch - - - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e - - - 27f17080376e409860405c40744887d81d6b3f34 - - - - - include-range-exclude-single - - Patch valid for a range of commits where a single one is excluded - - bug.patch - - - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 8ca5cc28e6746eef7340064b5d843631841bf31e - - - - 162db88346b06be20faac6976f1ff9bad986accf - - - - include-range-exclude-range - - Patch valid for a range of commits where a subrange is explicitly excluded - - bug.patch - - - 01f9f1f07bef22d4248e8349aba4f0c1f204607e - 4300ea495e7f013f68e785fdde5c4ead81297999 - - - - - c051e44a973ee31b3baa571407694467a513ba68 - a94fb35ca49719028a1c50bdbc2fb82122043f46 - - - - - From 062a29a2954b552f0b788823c59c6cee6915de36 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Sun, 16 Jul 2023 00:32:27 +0200 Subject: [PATCH 051/224] Basis for PatchSet class - Still missing union/intersection operators --- .../varats/provider/patch/patch_provider.py | 66 +++++++++++++++---- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index b770fa370..1f6ba8ff2 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -84,13 +84,13 @@ class Patch: """A class for storing a single project-specific Patch.""" def __init__( - self, - project_name: str, - shortname: str, - description: str, - path: Path, - valid_revisions: tp.Optional[tp.Set[CommitHash]] = None, - tags: tp.Optional[tp.Set[str]] = None + self, + project_name: str, + shortname: str, + description: str, + path: Path, + valid_revisions: tp.Optional[tp.Set[CommitHash]] = None, + tags: tp.Optional[tp.Set[str]] = None ): self.project_name: str = project_name self.shortname: str = shortname @@ -151,7 +151,7 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: include_revisions = { ShortCommitHash(h) for h in main_repo_git('log', '--pretty=%H', '--first-parent' - ).strip().split() + ).strip().split() } if "exclude_revisions" in yaml_dict: @@ -191,7 +191,7 @@ def __init__(self, project: tp.Type[Project]): if not filename.endswith(".info"): continue - info_path = Path(os.path.join(root,filename)) + info_path = Path(os.path.join(root, filename)) current_patch = Patch.from_yaml(info_path) patches.add(current_patch) @@ -212,7 +212,7 @@ def get_patches_for_revision(self, revision: CommitHash): @classmethod def create_provider_for_project( - cls: tp.Type[ProviderType], project: tp.Type[Project] + cls: tp.Type[ProviderType], project: tp.Type[Project] ): """ Creates a provider instance for the given project if possible. @@ -229,7 +229,7 @@ def create_provider_for_project( @classmethod def create_default_provider( - cls: tp.Type[ProviderType], project: tp.Type[Project] + cls: tp.Type[ProviderType], project: tp.Type[Project] ): """ Creates a default provider instance that can be used with any project. @@ -252,4 +252,46 @@ def _get_patches_repository_path() -> Path: patches_source.fetch() - return Path(Path(target_prefix()) / patches_source.local) \ No newline at end of file + return Path(Path(target_prefix()) / patches_source.local) + + +class PatchSet: + def __init__(self, patches: tp.Set[Patch]): + self.__patches: tp.FrozenSet[Patch] = frozenset(patches) + + def __iter__(self) -> tp.Iterator[str]: + return [k for k, _ in self.__patches].__iter__() + + def __contains__(self, v: tp.Any) -> bool: + return self.__patches.__contains__(v) + + def __len__(self) -> int: + return len(self.__patches) + + def __getitem__(self, tag): + tag_set = set(tag) + return PatchSet({p for p in self.__patches if tag_set.issubset(p.tags)}) + + def __and__(self, rhs: "PatchSet") -> "PatchSet": + lhs_t = self.__patches + rhs_t = rhs.__patches + + ret = {} + ... + return ret + + def __or__(self, rhs: "PatchSet") -> "PatchSet": + lhs_t = self.__patches + rhs_t = rhs.__patches + + ret = {} + ... + return ret + + def __hash__(self) -> int: + return hash(self.__patches) + + def __repr__(self) -> str: + repr_str = ", ".join([f"{k.shortname}" for k in self.__patches]) + + return f"PatchSet({{{repr_str}}})" From 39fd77c93653b7963913ed60052be7ceee1f43e7 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Sun, 16 Jul 2023 00:38:02 +0200 Subject: [PATCH 052/224] Changed get_patches_for_revision to return a PatchSet --- .../varats/provider/patch/patch_provider.py | 85 ++++++++++--------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 1f6ba8ff2..5592f0e1d 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -159,6 +159,47 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: return Patch(project_name, shortname, description, path, include_revisions, tags) +class PatchSet: + def __init__(self, patches: tp.Set[Patch]): + self.__patches: tp.FrozenSet[Patch] = frozenset(patches) + + def __iter__(self) -> tp.Iterator[str]: + return [k for k, _ in self.__patches].__iter__() + + def __contains__(self, v: tp.Any) -> bool: + return self.__patches.__contains__(v) + + def __len__(self) -> int: + return len(self.__patches) + + def __getitem__(self, tag): + tag_set = set(tag) + return PatchSet({p for p in self.__patches if tag_set.issubset(p.tags)}) + + def __and__(self, rhs: "PatchSet") -> "PatchSet": + lhs_t = self.__patches + rhs_t = rhs.__patches + + ret = {} + ... + return ret + + def __or__(self, rhs: "PatchSet") -> "PatchSet": + lhs_t = self.__patches + rhs_t = rhs.__patches + + ret = {} + ... + return ret + + def __hash__(self) -> int: + return hash(self.__patches) + + def __repr__(self) -> str: + repr_str = ", ".join([f"{k.shortname}" for k in self.__patches]) + + return f"PatchSet({{{repr_str}}})" + class PatchesNotFoundError(FileNotFoundError): # TODO: Implement me @@ -205,10 +246,10 @@ def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: return None - def get_patches_for_revision(self, revision: CommitHash): + def get_patches_for_revision(self, revision: CommitHash) -> PatchSet: """Returns all patches that are valid for the given revision.""" - return {p for p in self.__patches if revision in p.valid_revisions} + return PatchSet({p for p in self.__patches if revision in p.valid_revisions}) @classmethod def create_provider_for_project( @@ -255,43 +296,3 @@ def _get_patches_repository_path() -> Path: return Path(Path(target_prefix()) / patches_source.local) -class PatchSet: - def __init__(self, patches: tp.Set[Patch]): - self.__patches: tp.FrozenSet[Patch] = frozenset(patches) - - def __iter__(self) -> tp.Iterator[str]: - return [k for k, _ in self.__patches].__iter__() - - def __contains__(self, v: tp.Any) -> bool: - return self.__patches.__contains__(v) - - def __len__(self) -> int: - return len(self.__patches) - - def __getitem__(self, tag): - tag_set = set(tag) - return PatchSet({p for p in self.__patches if tag_set.issubset(p.tags)}) - - def __and__(self, rhs: "PatchSet") -> "PatchSet": - lhs_t = self.__patches - rhs_t = rhs.__patches - - ret = {} - ... - return ret - - def __or__(self, rhs: "PatchSet") -> "PatchSet": - lhs_t = self.__patches - rhs_t = rhs.__patches - - ret = {} - ... - return ret - - def __hash__(self) -> int: - return hash(self.__patches) - - def __repr__(self) -> str: - repr_str = ", ".join([f"{k.shortname}" for k in self.__patches]) - - return f"PatchSet({{{repr_str}}})" From c2be0af2caa8eb67ea22606d59bed4e356feeacc Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 16 Jul 2023 18:59:07 +0200 Subject: [PATCH 053/224] Small fixes to get new synth case studies working --- .../perf_tests/feature_perf_cs_collection.py | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index cc399219a..a4a4571be 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -171,7 +171,7 @@ class SynthSAFieldSensitivity(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthSAFieldSensitivity", - refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarks", + refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", limit=None, shallow=False, version_filter=project_filter_generator("SynthSAFieldSensitivity") @@ -202,7 +202,7 @@ def binaries_for_revision( # TODO: fix with commit after merge # only_valid_in=RevisionRange("162db88346", "master") only_valid_in=RevisionRange( - "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarks" + "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarksImpl" ) ) @@ -233,7 +233,7 @@ class SynthSAFlowSensitivity(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthSAFlowSensitivity", - refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarks", + refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", limit=None, shallow=False, version_filter=project_filter_generator("SynthSAFlowSensitivity") @@ -264,7 +264,7 @@ def binaries_for_revision( # TODO: fix with commit after merge # only_valid_in=RevisionRange("162db88346", "master") only_valid_in=RevisionRange( - "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarks" + "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarksImpl" ) ) @@ -349,21 +349,21 @@ def recompile(self) -> None: _do_feature_perf_cs_collection_recompile(self) -class SynthSAInterProcedural(VProject): +class SynthSAWholeProgram(VProject): """Synthetic case-study project for testing flow sensitivity.""" - NAME = 'SynthSAInterProcedural' + NAME = 'SynthSAWholeProgram' GROUP = 'perf_tests' DOMAIN = ProjectDomains.TEST SOURCE = [ bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", - local="SynthSAInterProcedural", - refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarks", + local="SynthSAWholeProgram", + refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", limit=None, shallow=False, - version_filter=project_filter_generator("SynthSAInterProcedural") + version_filter=project_filter_generator("SynthSAWholeProgram") ), FeatureSource() ] @@ -371,9 +371,8 @@ class SynthSAInterProcedural(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ Command( - SourceRoot("SynthSAInterProcedural") / - RSBinary("InterProcedural"), - label="ContextSense-no-input" + SourceRoot("SynthSAWholeProgram") / RSBinary("WholeProgram"), + label="WholeProgram-no-input" ) ] } @@ -383,16 +382,16 @@ def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 ) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap( - get_local_project_git_path(SynthSAInterProcedural.NAME) + get_local_project_git_path(SynthSAWholeProgram.NAME) ) binary_map.specify_binary( - "build/bin/InterProcedural", + "build/bin/WholeProgram", BinaryType.EXECUTABLE, # TODO: fix with commit after merge # only_valid_in=RevisionRange("162db88346", "master") only_valid_in=RevisionRange( - "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarks" + "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarksImpl" ) ) @@ -404,7 +403,7 @@ def run_tests(self) -> None: def compile(self) -> None: """Compile the project.""" _do_feature_perf_cs_collection_compile( - self, "FPCSC_ENABLE_PROJECT_SYNTHSAINTERPROCEDURAL" + self, "FPCSC_ENABLE_PROJECT_SYNTHSAWHOLEPROGRAM" ) def recompile(self) -> None: From fec01756157c9a7758a7d3ba2d77457664ff6fbc Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 16 Jul 2023 21:34:42 +0200 Subject: [PATCH 054/224] Small adaptions --- .../varats/provider/patch/patch_provider.py | 80 +++++++++++-------- varats-core/varats/report/report.py | 36 +++------ 2 files changed, 59 insertions(+), 57 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 5592f0e1d..e94cfe03a 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -17,6 +17,7 @@ from varats.project.project_util import get_local_project_git_path from varats.provider.provider import Provider, ProviderType +from varats.utils.git_commands import pull_current_branch from varats.utils.git_util import CommitHash, ShortCommitHash @@ -84,13 +85,13 @@ class Patch: """A class for storing a single project-specific Patch.""" def __init__( - self, - project_name: str, - shortname: str, - description: str, - path: Path, - valid_revisions: tp.Optional[tp.Set[CommitHash]] = None, - tags: tp.Optional[tp.Set[str]] = None + self, + project_name: str, + shortname: str, + description: str, + path: Path, + valid_revisions: tp.Optional[tp.Set[CommitHash]] = None, + tags: tp.Optional[tp.Set[str]] = None ): self.project_name: str = project_name self.shortname: str = shortname @@ -118,30 +119,38 @@ def from_yaml(yaml_path: Path): if "tags" in yaml_dict: tags = yaml_dict["tags"] - main_repo_git = _get_git_for_path(get_local_project_git_path(project_name)) + main_repo_git = _get_git_for_path( + get_local_project_git_path(project_name) + ) def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: res: tp.Set[CommitHash] = set() if "single_revision" in rev_dict: - if type(rev_dict["single_revision"]) == str: + if isinstance(rev_dict["single_revision"], str): res.add(ShortCommitHash(rev_dict["single_revision"])) else: - res.update([ShortCommitHash(r) for r in rev_dict["single_revision"]]) + res.update([ + ShortCommitHash(r) for r in rev_dict["single_revision"] + ]) if "revision_range" in rev_dict: - if type(rev_dict["revision_range"]) == list: + if isinstance(rev_dict["revision_range"], list): for rev_range in rev_dict["revision_range"]: - res.update({ShortCommitHash(h) for h in _get_all_revisions_between( - rev_range["start"], - rev_range["end"], - main_repo_git)}) + res.update({ + ShortCommitHash(h) + for h in _get_all_revisions_between( + rev_range["start"], rev_range["end"], + main_repo_git + ) + }) else: - res.update({ShortCommitHash(h) for h in _get_all_revisions_between( - rev_dict["revision_range"]["start"], - rev_dict["revision_range"]["end"], - main_repo_git - )}) + res.update({ + ShortCommitHash(h) for h in _get_all_revisions_between( + rev_dict["revision_range"]["start"], + rev_dict["revision_range"]["end"], main_repo_git + ) + }) return res @@ -151,15 +160,21 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: include_revisions = { ShortCommitHash(h) for h in main_repo_git('log', '--pretty=%H', '--first-parent' - ).strip().split() + ).strip().split() } if "exclude_revisions" in yaml_dict: - include_revisions.difference_update(parse_revisions(yaml_dict["exclude_revisions"])) + include_revisions.difference_update( + parse_revisions(yaml_dict["exclude_revisions"]) + ) + + return Patch( + project_name, shortname, description, path, include_revisions, tags + ) - return Patch(project_name, shortname, description, path, include_revisions, tags) class PatchSet: + def __init__(self, patches: tp.Set[Patch]): self.__patches: tp.FrozenSet[Patch] = frozenset(patches) @@ -209,7 +224,7 @@ class PatchesNotFoundError(FileNotFoundError): class PatchProvider(Provider): """A provider for getting patch files for a certain project.""" - patches_repository = "git@github.com:se-sic/vara-project-patches.git" + patches_repository = "https://github.com/se-sic/vara-project-patches.git" def __init__(self, project: tp.Type[Project]): super().__init__(project) @@ -219,7 +234,7 @@ def __init__(self, project: tp.Type[Project]): ) # BB only performs a fetch so our repo might be out of date - _get_git_for_path(patches_project_dir)("pull") + pull_current_branch(patches_project_dir) if not patches_project_dir.is_dir(): # TODO: Error handling/warning and None @@ -248,12 +263,13 @@ def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: def get_patches_for_revision(self, revision: CommitHash) -> PatchSet: """Returns all patches that are valid for the given revision.""" - - return PatchSet({p for p in self.__patches if revision in p.valid_revisions}) + return PatchSet({ + p for p in self.__patches if revision in p.valid_revisions + }) @classmethod def create_provider_for_project( - cls: tp.Type[ProviderType], project: tp.Type[Project] + cls: tp.Type[ProviderType], project: tp.Type[Project] ): """ Creates a provider instance for the given project if possible. @@ -270,7 +286,7 @@ def create_provider_for_project( @classmethod def create_default_provider( - cls: tp.Type[ProviderType], project: tp.Type[Project] + cls: tp.Type[ProviderType], project: tp.Type[Project] ): """ Creates a default provider instance that can be used with any project. @@ -287,12 +303,10 @@ def _get_patches_repository_path() -> Path: patches_source = bb.source.Git( remote=PatchProvider.patches_repository, local="patch-configurations", - refspec="origin/HEAD", + refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", limit=1, ) patches_source.fetch() - return Path(Path(target_prefix()) / patches_source.local) - - + return Path(target_prefix()) / patches_source.local diff --git a/varats-core/varats/report/report.py b/varats-core/varats/report/report.py index 24e8c6988..d454b1ee6 100644 --- a/varats-core/varats/report/report.py +++ b/varats-core/varats/report/report.py @@ -147,21 +147,21 @@ class ReportFilename(): r"(?P.*)-" + r"(?P.*)-" + r"(?P.*)-(?P.*)-" + r"(?P.*)[_\/](?P[0-9a-fA-F\-]*)" - r"(_config-(?P\d+))?" + r"(_patch-(?P.+))?" + - "_" + FileStatusExtension.get_regex_grp() + r"?" + - r"(?P\..*)?" + "$" + r"(_config-(?P\d+))?" + "_" + + FileStatusExtension.get_regex_grp() + r"?" + r"(?P\..*)?" + + "$" ) __RESULT_FILE_TEMPLATE = ( "{experiment_shorthand}-" + "{report_shorthand}-" + "{project_name}-" + - "{binary_name}-" + "{project_revision}_" + - "{project_uuid}_{patch_name}" + "{status_ext}" + "{file_ext}" + "{binary_name}-" + "{project_revision}_" + "{project_uuid}_" + + "{status_ext}" + "{file_ext}" ) __CONFIG_SPECIFIC_RESULT_FILE_TEMPLATE = ( "{experiment_shorthand}-" + "{report_shorthand}-" + "{project_name}-" + "{binary_name}-" + "{project_revision}/" + "{project_uuid}" + - "_{patch-name}config-{config_id}_" + "{status_ext}" + "{file_ext}" + "_config-{config_id}_" + "{status_ext}" + "{file_ext}" ) def __init__(self, file_name: tp.Union[str, Path]) -> None: @@ -396,8 +396,7 @@ def get_file_name( project_uuid: str, extension_type: FileStatusExtension, file_ext: str = ".txt", - config_id: tp.Optional[int] = None, - patch_name: tp.Optional[str] = None + config_id: tp.Optional[int] = None ) -> 'ReportFilename': """ Generates a filename for a report file out the different parts. @@ -413,7 +412,7 @@ def get_file_name( file_ext: file extension of the report file Returns: - name for the report file that can later be uniquely identified + name for the report file that can later be uniquly identified """ status_ext = FileStatusExtension.get_status_extension(extension_type) @@ -421,14 +420,6 @@ def get_file_name( if file_ext and not file_ext.startswith("."): file_ext = "." + file_ext - if patch_name is not None: - patch_name = f"patch-{patch_name}" - - if not patch_name.endswith("_"): - patch_name = patch_name + "_" - else: - patch_name = "" - if config_id is not None: return ReportFilename( ReportFilename.__CONFIG_SPECIFIC_RESULT_FILE_TEMPLATE.format( @@ -440,8 +431,7 @@ def get_file_name( project_uuid=project_uuid, status_ext=status_ext, config_id=config_id, - file_ext=file_ext, - patch_name=patch_name + file_ext=file_ext ) ) @@ -454,8 +444,7 @@ def get_file_name( project_revision=project_revision, project_uuid=project_uuid, status_ext=status_ext, - file_ext=file_ext, - patch_name=patch_name + file_ext=file_ext ) ) @@ -607,8 +596,7 @@ def get_file_name( project_revision: ShortCommitHash, project_uuid: str, extension_type: FileStatusExtension, - config_id: tp.Optional[int] = None, - patch_name: tp.Optional[str] = None + config_id: tp.Optional[int] = None ) -> ReportFilename: """ Generates a filename for a report file. @@ -627,7 +615,7 @@ def get_file_name( return ReportFilename.get_file_name( experiment_shorthand, cls.SHORTHAND, project_name, binary_name, project_revision, project_uuid, extension_type, cls.FILE_TYPE, - config_id, patch_name + config_id ) @property From 63ede6b36f0bd95c8a39d5448375cc3ebd4e6c6f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 16 Jul 2023 21:36:19 +0200 Subject: [PATCH 055/224] Roll back exp util changes --- .../varats/experiment/experiment_util.py | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index 6f7fe2c08..d70af5353 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -306,8 +306,7 @@ def get_file_name( project_revision: ShortCommitHash, project_uuid: str, extension_type: FileStatusExtension, - config_id: tp.Optional[int] = None, - patch_name: tp.Optional[str] = None + config_id: tp.Optional[int] = None ) -> ReportFilename: """ Generates a filename for a report file that is generated by the @@ -328,8 +327,7 @@ def get_file_name( return self.__experiment.report_spec( ).get_report_type(report_shorthand).get_file_name( self.__experiment.shorthand(), project_name, binary_name, - project_revision, project_uuid, extension_type, config_id, - patch_name + project_revision, project_uuid, extension_type, config_id ) def report_spec(self) -> ReportSpecification: @@ -577,8 +575,7 @@ def __create_new_result_filepath_impl( project: VProject, binary: ProjectBinaryWrapper, extension_type: FileStatusExtension, - config_id: tp.Optional[int] = None, - patch_name: tp.Optional[str] = None + config_id: tp.Optional[int] = None ) -> ReportFilepath: """ Create a result filepath for the specified file extension and report of the @@ -605,8 +602,7 @@ def __create_new_result_filepath_impl( project_revision=ShortCommitHash(project.version_of_primary), project_uuid=str(project.run_uuid), extension_type=extension_type, - config_id=config_id, - patch_name=patch_name + config_id=config_id ) ) @@ -624,8 +620,7 @@ def create_new_success_result_filepath( report_type: tp.Type[BaseReport], project: VProject, binary: ProjectBinaryWrapper, - config_id: tp.Optional[int] = None, - patch_name: tp.Optional[str] = None + config_id: tp.Optional[int] = None ) -> ReportFilepath: """ Create a result filepath for a successfull report of the executed @@ -637,13 +632,12 @@ def create_new_success_result_filepath( project: current project binary: current binary config_id: optional id to specify the used configuration - patch_name: optional name of the patch that was applied Returns: formatted success filepath """ return __create_new_result_filepath_impl( exp_handle, report_type, project, binary, FileStatusExtension.SUCCESS, - config_id, patch_name + config_id ) @@ -652,8 +646,7 @@ def create_new_failed_result_filepath( report_type: tp.Type[BaseReport], project: VProject, binary: ProjectBinaryWrapper, - config_id: tp.Optional[int] = None, - patch_name: tp.Optional[str] = None + config_id: tp.Optional[int] = None ) -> ReportFilepath: """ Create a result filepath for a failed report of the executed @@ -670,7 +663,7 @@ def create_new_failed_result_filepath( """ return __create_new_result_filepath_impl( exp_handle, report_type, project, binary, FileStatusExtension.FAILED, - config_id, patch_name + config_id ) From 514e25322da94944c89c071b35bc11c1e4d9a319 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 16 Jul 2023 21:37:10 +0200 Subject: [PATCH 056/224] Fixes formatting in tests --- tests/provider/test_patch_provider.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index 7adda0b75..3cdaf2c74 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -10,11 +10,7 @@ from varats.projects.perf_tests.feature_perf_cs_collection import ( FeaturePerfCSCollection, ) -from varats.provider.patch.patch_provider import ( - PatchProvider, - Patch, -) - +from varats.provider.patch.patch_provider import PatchProvider, Patch from varats.utils.git_util import ShortCommitHash @@ -45,9 +41,8 @@ class TestPatchRevisionRanges(unittest.TestCase): @classmethod def setUpClass(cls) -> None: cls.patch_base_path = Path( - TEST_INPUTS_DIR / - 'patch_configs/FeaturePerfCSCollection/' - ) + TEST_INPUTS_DIR / 'patch_configs/FeaturePerfCSCollection/' + ) project_git_source = bb.source.Git( remote="git@github.com:se-sic/FeaturePerfCSCollection.git", @@ -70,7 +65,7 @@ def setUpClass(cls) -> None: def __test_patch_revisions( self, shortname: str, expected_revisions: set[ShortCommitHash] ): - patch = Patch.from_yaml(self.patch_base_path/f"{shortname}.info") + patch = Patch.from_yaml(self.patch_base_path / f"{shortname}.info") self.assertSetEqual(expected_revisions, patch.valid_revisions) From f4a56896078369c5ee7e1e19afb15f04f63aceca Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 16 Jul 2023 22:43:07 +0200 Subject: [PATCH 057/224] Get patch provider working --- .../varats/provider/patch/patch_provider.py | 82 +++++++++++++------ .../vara/feature_perf_precision.py | 61 +++++--------- 2 files changed, 79 insertions(+), 64 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index e94cfe03a..cc241aa50 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -13,12 +13,17 @@ _get_all_revisions_between, _get_git_for_path, ) -from plumbum import local +from plumbum import local, ProcessExecutionError from varats.project.project_util import get_local_project_git_path +from varats.project.varats_project import VProject from varats.provider.provider import Provider, ProviderType -from varats.utils.git_commands import pull_current_branch -from varats.utils.git_util import CommitHash, ShortCommitHash +from varats.utils.git_commands import pull_current_branch, apply_patch +from varats.utils.git_util import ( + CommitHash, + ShortCommitHash, + get_all_revisions_between, +) def __get_project_git(project: Project): @@ -30,27 +35,36 @@ def __get_project_git(project: Project): class ApplyPatch(actions.ProjectStep): """Apply a patch to a project.""" - NAME = "ApplyPatch" + NAME = "APPLY_PATCH" DESCRIPTION = "Apply a Git patch to a project." - def __init__(self, project, patch): + def __init__(self, project: VProject, patch: 'Patch') -> None: super().__init__(project) self.__patch = patch - def __call__(self) -> StepResult: - repo_git = __get_project_git(self.project) + # TODO: discuss signature + def __call__(self, _: tp.Any) -> StepResult: + try: + print( + f"Applying {self.__patch.shortname} to " + f"{self.project.source_of(self.project.primary_source)}" + ) + apply_patch( + Path(self.project.source_of(self.project.primary_source)), + self.__patch.path + ) - patch_path = self.__patch.path + except ProcessExecutionError: + self.status = StepResult.ERROR - repo_git("apply", patch_path) + self.status = StepResult.OK return StepResult.OK def __str__(self, indent: int = 0) -> str: return textwrap.indent( - f"* {self.project.name}: " - f"Apply the patch " - f"'{self.__patch.shortname}' to the project.", " " * indent + f"* {self.project.name}: Apply patch " + f"{self.__patch.shortname}", " " * indent ) @@ -114,6 +128,9 @@ def from_yaml(yaml_path: Path): shortname = yaml_dict["shortname"] description = yaml_dict["description"] path = yaml_dict["path"] + # Convert to full qualified path, as we know that path is relative to + # the yaml info file. + path = yaml_path.parent / path tags = None if "tags" in yaml_dict: @@ -137,13 +154,13 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: if "revision_range" in rev_dict: if isinstance(rev_dict["revision_range"], list): for rev_range in rev_dict["revision_range"]: - res.update({ - ShortCommitHash(h) - for h in _get_all_revisions_between( + res.update( + get_all_revisions_between( rev_range["start"], rev_range["end"], - main_repo_git + ShortCommitHash, + get_local_project_git_path(project_name) ) - }) + ) else: res.update({ ShortCommitHash(h) for h in _get_all_revisions_between( @@ -172,14 +189,30 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: project_name, shortname, description, path, include_revisions, tags ) + def __repr__(self) -> str: + return str(self) + + def __str__(self) -> str: + valid_revs = [str(r) for r in self.valid_revisions + ] if self.valid_revisions else [] + str_representation = f"""Patch( + ProjectName: {self.project_name} + Shortname: {self.shortname} + Path: {self.path} + ValidRevs: {valid_revs} +) +""" + + return str_representation + class PatchSet: def __init__(self, patches: tp.Set[Patch]): self.__patches: tp.FrozenSet[Patch] = frozenset(patches) - def __iter__(self) -> tp.Iterator[str]: - return [k for k, _ in self.__patches].__iter__() + def __iter__(self) -> tp.Iterator[Patch]: + return self.__patches.__iter__() def __contains__(self, v: tp.Any) -> bool: return self.__patches.__contains__(v) @@ -224,18 +257,18 @@ class PatchesNotFoundError(FileNotFoundError): class PatchProvider(Provider): """A provider for getting patch files for a certain project.""" - patches_repository = "https://github.com/se-sic/vara-project-patches.git" + patches_repository = "git@github.com:se-sic/vara-project-patches.git" def __init__(self, project: tp.Type[Project]): super().__init__(project) + # BB only performs a fetch so our repo might be out of date + pull_current_branch(self._get_patches_repository_path()) + patches_project_dir = Path( self._get_patches_repository_path() / self.project.NAME ) - # BB only performs a fetch so our repo might be out of date - pull_current_branch(patches_project_dir) - if not patches_project_dir.is_dir(): # TODO: Error handling/warning and None raise PatchesNotFoundError() @@ -304,7 +337,8 @@ def _get_patches_repository_path() -> Path: remote=PatchProvider.patches_repository, local="patch-configurations", refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", - limit=1, + limit=None, + shallow=False ) patches_source.fetch() diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index d73ea1986..41ad39a02 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -43,10 +43,12 @@ from varats.project.project_domain import ProjectDomains from varats.project.project_util import BinaryType, ProjectBinaryWrapper from varats.project.varats_project import VProject +from varats.provider.patch.patch_provider import PatchProvider, ApplyPatch from varats.report.gnu_time_report import TimeReportAggregate from varats.report.report import ReportSpecification, ReportTy, BaseReport from varats.report.tef_report import TEFReport, TEFReportAggregate from varats.utils.git_commands import apply_patch +from varats.utils.git_util import ShortCommitHash class AnalysisProjectStepBase(ProjectStep): @@ -140,35 +142,6 @@ def __str__(self, indent: int = 0) -> str: ) -class ApplyPatch(ProjectStep): - NAME = "APPLY_PATCH" - DESCRIPTION = "Apply a patch the project" - - def __init__(self, project: VProject, patch_file: Path) -> None: - super().__init__(project) - self.__patch_file = patch_file - - def __call__(self, _: tp.Any) -> StepResult: - try: - print( - f"Applying {self.__patch_file} to {self.project.source_of(self.project.primary_source)}" - ) - apply_patch( - Path(self.project.source_of(self.project.primary_source)), - self.__patch_file - ) - except ProcessExecutionError: - self.status = StepResult.ERROR - self.status = StepResult.OK - - return self.status - - def __str__(self, indent: int = 0) -> str: - return textwrap.indent( - f"* {self.project.name}: Apply patch", indent * " " - ) - - class RunGenTracedWorkloads(AnalysisProjectStepBase): # type: ignore """Executes the traced project binaries on the specified workloads.""" @@ -270,6 +243,7 @@ def setup_actions_for_vara_experiment( get_current_config_id(project) ) + # TODO: integrate patches analysis_actions = [] analysis_actions.append(actions.Compile(project)) @@ -450,22 +424,29 @@ def actions_for_project( get_current_config_id(project) ) + patch_provider = PatchProvider.get_provider_for_project(project) + patches = patch_provider.get_patches_for_revision( + ShortCommitHash(project.version_of_primary) + ) + print(f"{patches=}") + + patch_steps = [] + for patch in patches: + print(f"Got patch with path: {patch.path}") + patch_steps.append(ApplyPatch(project, patch)) + patch_steps.append(ReCompile(project)) + patch_steps.append( + RunBackBoxBaseline(project, binary, result_post_fix="new") + ) + analysis_actions = [] analysis_actions.append(actions.Compile(project)) analysis_actions.append( ZippedExperimentSteps( - result_filepath, [ # type: ignore - RunBackBoxBaseline(project, binary, result_post_fix="old"), - ApplyPatch( - project, - Path( - "/home/vulder/git/FeaturePerfCSCollection/test.patch" - ) - ), - ReCompile(project), - RunBackBoxBaseline(project, binary, result_post_fix="new") - ] + result_filepath, + [RunBackBoxBaseline(project, binary, result_post_fix="old")] + + patch_steps ) ) analysis_actions.append(actions.Clean(project)) From efd60a6a08bd32d2f11b553dcc22e3dca3a8731a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 16 Jul 2023 22:48:00 +0200 Subject: [PATCH 058/224] Adapt profiler step creator --- .../vara/feature_perf_precision.py | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 41ad39a02..83955b7de 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -243,27 +243,30 @@ def setup_actions_for_vara_experiment( get_current_config_id(project) ) + patch_provider = PatchProvider.get_provider_for_project(project) + patches = patch_provider.get_patches_for_revision( + ShortCommitHash(project.version_of_primary) + ) + print(f"{patches=}") + + patch_steps = [] + for patch in patches: + print(f"Got patch with path: {patch.path}") + patch_steps.append(ApplyPatch(project, patch)) + patch_steps.append(ReCompile(project)) + patch_steps.append( + analysis_step(project, binary, result_post_fix="new") + ) + # TODO: integrate patches analysis_actions = [] analysis_actions.append(actions.Compile(project)) analysis_actions.append( ZippedExperimentSteps( - result_filepath, [ # type: ignore - analysis_step( - project, binary, result_post_fix="old" - ), - ApplyPatch( - project, - Path( - "/home/vulder/git/FeaturePerfCSCollection/test.patch" - ) - ), - ReCompile(project), - analysis_step( - project, binary, result_post_fix="new" - ) - ] + result_filepath, + [analysis_step(project, binary, result_post_fix="old")] + + patch_steps ) ) analysis_actions.append(actions.Clean(project)) From afdba46aad049cc3be14529dfdf5761a63d98bc5 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 16 Jul 2023 23:12:39 +0200 Subject: [PATCH 059/224] Adds revert support --- .../varats/provider/patch/patch_provider.py | 31 +++++++++++++------ varats-core/varats/utils/git_commands.py | 5 +++ .../vara/feature_perf_precision.py | 14 ++++++--- 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index cc241aa50..5ae0e92b9 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -18,7 +18,11 @@ from varats.project.project_util import get_local_project_git_path from varats.project.varats_project import VProject from varats.provider.provider import Provider, ProviderType -from varats.utils.git_commands import pull_current_branch, apply_patch +from varats.utils.git_commands import ( + pull_current_branch, + apply_patch, + revert_patch, +) from varats.utils.git_util import ( CommitHash, ShortCommitHash, @@ -71,27 +75,36 @@ def __str__(self, indent: int = 0) -> str: class RevertPatch(actions.ProjectStep): """Revert a patch from a project.""" - NAME = "RevertPatch" + NAME = "REVERT_PATCH" DESCRIPTION = "Revert a Git patch from a project." def __init__(self, project, patch): super().__init__(project) self.__patch = patch - def __call__(self) -> StepResult: - repo_git = __get_project_git(self.project) + # TODO: discuss signature + def __call__(self, _: tp.Any) -> StepResult: + try: + print( + f"Reverting {self.__patch.shortname} on " + f"{self.project.source_of(self.project.primary_source)}" + ) + revert_patch( + Path(self.project.source_of(self.project.primary_source)), + self.__patch.path + ) - patch_path = self.__patch.path + except ProcessExecutionError: + self.status = StepResult.ERROR - repo_git("apply", "-R", patch_path) + self.status = StepResult.OK return StepResult.OK def __str__(self, indent: int = 0) -> str: return textwrap.indent( - f"* {self.project.name}: " - f"Revert the patch '{self.__patch.shortname}' " - f"from the project.", " " * indent + f"* {self.project.name}: Revert patch " + f"{self.__patch.shortname}", " " * indent ) diff --git a/varats-core/varats/utils/git_commands.py b/varats-core/varats/utils/git_commands.py index 8dbb60ad7..6c5c918e7 100644 --- a/varats-core/varats/utils/git_commands.py +++ b/varats-core/varats/utils/git_commands.py @@ -152,3 +152,8 @@ def download_repo( def apply_patch(repo_folder: Path, patch_file: Path) -> None: """Applies a given patch file to the specified git repository.""" git("-C", repo_folder.absolute(), "apply", str(patch_file)) + + +def revert_patch(repo_folder: Path, patch_file: Path) -> None: + """Applies a given patch file to the specified git repository.""" + git("-C", repo_folder.absolute(), "apply", "-R", str(patch_file)) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 83955b7de..e8ed526d3 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -43,11 +43,14 @@ from varats.project.project_domain import ProjectDomains from varats.project.project_util import BinaryType, ProjectBinaryWrapper from varats.project.varats_project import VProject -from varats.provider.patch.patch_provider import PatchProvider, ApplyPatch +from varats.provider.patch.patch_provider import ( + PatchProvider, + ApplyPatch, + RevertPatch, +) from varats.report.gnu_time_report import TimeReportAggregate from varats.report.report import ReportSpecification, ReportTy, BaseReport from varats.report.tef_report import TEFReport, TEFReportAggregate -from varats.utils.git_commands import apply_patch from varats.utils.git_util import ShortCommitHash @@ -255,8 +258,11 @@ def setup_actions_for_vara_experiment( patch_steps.append(ApplyPatch(project, patch)) patch_steps.append(ReCompile(project)) patch_steps.append( - analysis_step(project, binary, result_post_fix="new") + analysis_step( + project, binary, result_post_fix=f"patched_{patch.shortname}" + ) ) + patch_steps.append(RevertPatch(project, patch)) # TODO: integrate patches analysis_actions = [] @@ -741,7 +747,7 @@ def actions_for_project( ZippedExperimentSteps( result_filepath, [ - RunBackBoxBaselineOverhead( # type: ignore + RunBackBoxBaselineOverhead( # type: ignore project, binary ), From b407c7b6c47c7f584af86846f971d258f569f92c Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 17 Jul 2023 22:24:24 +0200 Subject: [PATCH 060/224] Implements multi patch handling --- tests/report/test_multi_patch_report.py | 76 +++++++++ .../varats/experiment/experiment_util.py | 3 + .../varats/report/multi_patch_report.py | 82 ++++++++++ .../vara/feature_perf_precision.py | 107 +++++-------- .../varats/tables/feature_perf_precision.py | 146 ++++++++++++------ 5 files changed, 299 insertions(+), 115 deletions(-) create mode 100644 tests/report/test_multi_patch_report.py create mode 100644 varats-core/varats/report/multi_patch_report.py diff --git a/tests/report/test_multi_patch_report.py b/tests/report/test_multi_patch_report.py new file mode 100644 index 000000000..e192e1a79 --- /dev/null +++ b/tests/report/test_multi_patch_report.py @@ -0,0 +1,76 @@ +"""Test MultiPatchReport.""" + +import unittest +from pathlib import Path + +from varats.provider.patch.patch_provider import Patch +from varats.report.multi_patch_report import MultiPatchReport + + +class TestMultiPatchReport(unittest.TestCase): + """Tests if the basic components of MultiPatchReport are working.""" + + def test_baseline_report_name(self) -> None: + """Tests if baseline report names are correctly created and checked.""" + baseline_report_name = MultiPatchReport.create_baseline_report_name( + "my_base.txt" + ) + + self.assertEqual(baseline_report_name, "baseline_my_base.txt") + self.assertTrue( + MultiPatchReport.is_baseline_report(baseline_report_name) + ) + + self.assertFalse( + MultiPatchReport.is_baseline_report(baseline_report_name[1:]) + ) + + def test_patched_report_name(self) -> None: + """Tests if patched report names are correctly created and checked.""" + patch_shortname = "shortname" + patch = Patch("MyPatch", patch_shortname, "desc", Path()) + patched_report_name = MultiPatchReport.create_patched_report_name( + patch, "my_base.txt" + ) + + self.assertEqual( + patched_report_name, + f"patched_{len(patch_shortname)}_{patch_shortname}_my_base.txt" + ) + self.assertTrue(MultiPatchReport.is_patched_report(patched_report_name)) + self.assertFalse( + MultiPatchReport.is_baseline_report(patched_report_name) + ) + + self.assertFalse( + MultiPatchReport.is_baseline_report(patched_report_name[1:]) + ) + + def test_patched_report_parsing(self) -> None: + """Test if we can correctly parse patch shortnames.""" + patch_shortname = "shortname" + patch = Patch("MyPatch", patch_shortname, "desc", Path()) + patched_report_name = MultiPatchReport.create_patched_report_name( + patch, "my_base.txt" + ) + + self.assertEqual( + MultiPatchReport. + _parse_patch_shorthand_from_report_name(patched_report_name), + patch_shortname + ) + + def test_patched_report_parsing_with_extra_underscores(self) -> None: + """Test special parsing case where the patch shortname contains + underscores.""" + patch_shortname = "sh_ort_name" + patch = Patch("MyPatch", patch_shortname, "desc", Path()) + patched_report_name = MultiPatchReport.create_patched_report_name( + patch, "my_base.txt" + ) + + self.assertEqual( + MultiPatchReport. + _parse_patch_shorthand_from_report_name(patched_report_name), + patch_shortname + ) diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index d70af5353..3ad21170b 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -519,6 +519,9 @@ def run_child_with_output_folder( return child(tmp_folder) +# TODO: update extra wrapper step + + class ZippedExperimentSteps(MultiStep[NeedsOutputFolder]): #type: ignore """Runs multiple actions, providing them a shared tmp folder that afterwards is zipped into an archive.""" diff --git a/varats-core/varats/report/multi_patch_report.py b/varats-core/varats/report/multi_patch_report.py new file mode 100644 index 000000000..e0f7a1d5b --- /dev/null +++ b/varats-core/varats/report/multi_patch_report.py @@ -0,0 +1,82 @@ +"""MultiPatchReport to group together similar reports that where produced for +differently patched projects.""" +import shutil +import tempfile +import typing as tp +from pathlib import Path + +from varats.provider.patch.patch_provider import Patch +from varats.report.report import ReportTy, BaseReport + + +class MultiPatchReport( + BaseReport, tp.Generic[ReportTy], shorthand="MPR", file_type=".zip" +): + """Meta report to group together reports of the same type that where + produced with differently patched projects.""" + + def __init__(self, path: Path, report_type: tp.Type[ReportTy]) -> None: + super().__init__(path) + self.__patched_reports: tp.Dict[str, ReportTy] = {} + + with tempfile.TemporaryDirectory() as tmp_result_dir: + shutil.unpack_archive(path, extract_dir=tmp_result_dir) + + for report in Path(tmp_result_dir).iterdir(): + if self.is_baseline_report(report.name): + self.__base = report_type(report) + elif self.is_patched_report(report.name): + self.__patched_reports[ + self._parse_patch_shorthand_from_report_name( + report.name + )] = report_type(report) + + if not self.__base or not self.__patched_reports: + raise AssertionError( + "Reports where missing in the file {report_path=}" + ) + + def get_baseline_report(self) -> ReportTy: + return self.__base + + def get_report_for_patch(self, + patch_shortname: str) -> tp.Optional[ReportTy]: + """Get the report for a given patch shortname.""" + if patch_shortname in self.__patched_reports: + return self.__patched_reports[patch_shortname] + + return None + + def get_patch_names(self) -> tp.List[str]: + return list(self.__patched_reports.keys()) + + def get_patched_reports(self) -> tp.ValuesView[ReportTy]: + return self.__patched_reports.values() + + @staticmethod + def create_baseline_report_name(base_file_name: str) -> str: + return f"baseline_{base_file_name}" + + @staticmethod + def is_baseline_report(file_name: str) -> bool: + return file_name.startswith("baseline_") + + @staticmethod + def create_patched_report_name(patch: Patch, base_file_name: str) -> str: + return ( + f"patched_{len(patch.shortname)}_" + + f"{patch.shortname}_{base_file_name}" + ) + + @staticmethod + def is_patched_report(file_name: str) -> bool: + return file_name.startswith("patched_") + + @staticmethod + def _parse_patch_shorthand_from_report_name(file_name: str) -> str: + """Parse the patch shorthand from a given patched report.""" + fn_without_prefix = file_name[len("patched_"):] + split_leftover_fn = fn_without_prefix.partition("_") + shortname_length = int(split_leftover_fn[0]) + patch_shortname = "".join(split_leftover_fn[2:])[:shortname_length] + return patch_shortname diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index e8ed526d3..a634cd064 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -1,7 +1,5 @@ """Module for feature performance precision experiments that evaluate measurement support of vara.""" -import shutil -import tempfile import textwrap import typing as tp from pathlib import Path @@ -9,13 +7,7 @@ import benchbuild.extensions as bb_ext from benchbuild.command import cleanup from benchbuild.utils import actions -from benchbuild.utils.actions import ( - ProjectStep, - Step, - StepResult, - Compile, - Clean, -) +from benchbuild.utils.actions import ProjectStep, StepResult, Compile, Clean from benchbuild.utils.cmd import time from plumbum import local, ProcessExecutionError @@ -23,8 +15,6 @@ PerfInfluenceTraceReportAggregate, ) from varats.experiment.experiment_util import ( - ExperimentHandle, - VersionExperiment, WithUnlimitedStackSize, ZippedReportFolder, create_new_success_result_filepath, @@ -36,8 +26,6 @@ from varats.experiment.workload_util import WorkloadCategory, workload_commands from varats.experiments.vara.feature_experiment import ( FeatureExperiment, - RunVaRATracedWorkloads, - RunVaRATracedXRayWorkloads, FeatureInstrType, ) from varats.project.project_domain import ProjectDomains @@ -49,6 +37,7 @@ RevertPatch, ) from varats.report.gnu_time_report import TimeReportAggregate +from varats.report.multi_patch_report import MultiPatchReport from varats.report.report import ReportSpecification, ReportTy, BaseReport from varats.report.tef_report import TEFReport, TEFReportAggregate from varats.utils.git_util import ShortCommitHash @@ -62,45 +51,17 @@ def __init__( self, project: VProject, binary: ProjectBinaryWrapper, - result_post_fix: str = "", + file_name: str, report_file_ending: str = "json", reps=2 ): super().__init__(project=project) self._binary = binary self._report_file_ending = report_file_ending - self._result_pre_fix = result_post_fix + self._file_name = file_name self._reps = reps -class MultiPatchReport( - BaseReport, tp.Generic[ReportTy], shorthand="MPR", file_type=".zip" -): - - def __init__(self, path: Path, report_type: tp.Type[ReportTy]) -> None: - super().__init__(path) - with tempfile.TemporaryDirectory() as tmp_result_dir: - shutil.unpack_archive(path, extract_dir=tmp_result_dir) - - # TODO: clean up - for report in Path(tmp_result_dir).iterdir(): - if report.name.startswith("old"): - self.__old = report_type(report) - elif report.name.startswith("new"): - self.__new = report_type(report) - - if not self.__old or not self.__new: - raise AssertionError( - "Reports where missing in the file {report_path=}" - ) - - def get_old_report(self) -> ReportTy: - return self.__old - - def get_new_report(self) -> ReportTy: - return self.__new - - class MPRTRA( MultiPatchReport[TimeReportAggregate], shorthand="MPRTRA", file_type=".zip" ): @@ -157,13 +118,11 @@ def __init__( self, project: VProject, binary: ProjectBinaryWrapper, - result_post_fix: str = "", + file_name: str, report_file_ending: str = "json", reps=2 ): - super().__init__( - project, binary, result_post_fix, report_file_ending, reps - ) + super().__init__(project, binary, file_name, report_file_ending, reps) def __call__(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -176,7 +135,7 @@ def __str__(self, indent: int = 0) -> str: def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): - zip_tmp_dir = tmp_dir / f"{self._result_pre_fix}_rep_measures" + zip_tmp_dir = tmp_dir / self._file_name with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: for rep in range(0, self._reps): for prj_command in workload_commands( @@ -259,20 +218,28 @@ def setup_actions_for_vara_experiment( patch_steps.append(ReCompile(project)) patch_steps.append( analysis_step( - project, binary, result_post_fix=f"patched_{patch.shortname}" + project, + binary, + file_name=MultiPatchReport.create_patched_report_name( + patch, "rep_measurements" + ) ) ) patch_steps.append(RevertPatch(project, patch)) - # TODO: integrate patches analysis_actions = [] analysis_actions.append(actions.Compile(project)) analysis_actions.append( ZippedExperimentSteps( - result_filepath, - [analysis_step(project, binary, result_post_fix="old")] + - patch_steps + result_filepath, [ + analysis_step( + project, + binary, + file_name=MultiPatchReport. + create_baseline_report_name("rep_measurements") + ) + ] + patch_steps ) ) analysis_actions.append(actions.Clean(project)) @@ -337,15 +304,15 @@ def __init__( self, project: VProject, binary: ProjectBinaryWrapper, - result_post_fix: str = "", + file_name: str, report_file_ending: str = "txt", reps=2 ): super().__init__(project=project) self.__binary = binary self.__report_file_ending = report_file_ending - self.__result_pre_fix = result_post_fix self.__reps = reps + self.__file_name = file_name def __call__(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -358,7 +325,7 @@ def __str__(self, indent: int = 0) -> str: def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): - zip_tmp_dir = tmp_dir / f"{self.__result_pre_fix}_rep_measures" + zip_tmp_dir = tmp_dir / self.__file_name with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: for rep in range(0, self.__reps): for prj_command in workload_commands( @@ -445,17 +412,29 @@ def actions_for_project( patch_steps.append(ApplyPatch(project, patch)) patch_steps.append(ReCompile(project)) patch_steps.append( - RunBackBoxBaseline(project, binary, result_post_fix="new") + RunBackBoxBaseline( + project, + binary, + file_name=MPRTRA.create_patched_report_name( + patch, "rep_measurements" + ) + ) ) + patch_steps.append(RevertPatch(project, patch)) analysis_actions = [] analysis_actions.append(actions.Compile(project)) analysis_actions.append( ZippedExperimentSteps( - result_filepath, - [RunBackBoxBaseline(project, binary, result_post_fix="old")] + - patch_steps + result_filepath, [ + RunBackBoxBaseline( + project, + binary, + file_name=MPRTRA. + create_baseline_report_name("rep_measurements") + ) + ] + patch_steps ) ) analysis_actions.append(actions.Clean(project)) @@ -480,13 +459,11 @@ def __init__( self, project: VProject, binary: ProjectBinaryWrapper, - result_post_fix: str = "", + file_name: str, report_file_ending: str = "txt", reps=2 ): - super().__init__( - project, binary, result_post_fix, report_file_ending, reps - ) + super().__init__(project, binary, file_name, report_file_ending, reps) def __call__(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -582,7 +559,7 @@ def setup_actions_for_vara_overhead_experiment( result_filepath, [ analysis_step( # type: ignore - project, binary + project, binary, "overhead" ) ] ) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 78188e1be..858c99f85 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -104,8 +104,29 @@ def get_feature_performance_from_tef_report( return feature_performances +def get_patch_names(case_study: CaseStudy) -> tp.List[str]: + report_files = get_processed_revisions_files( + case_study.project_name, + fpp.BlackBoxBaselineRunner, + fpp.MPRTRA, + get_case_study_file_name_filter(case_study), + config_id=0 + ) + + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print("Could not find profiling data. config_id=0, profiler=Baseline") + return [] + + # TODO: fix to prevent double loading + time_reports = fpp.MPRTRA(report_files[0].full_path()) + return time_reports.get_patch_names() + + def get_regressing_config_ids_gt( - project_name: str, case_study: CaseStudy, rev: FullCommitHash + project_name: str, case_study: CaseStudy, rev: FullCommitHash, + report_name: str ) -> tp.Optional[tp.Dict[int, bool]]: """Computes the baseline data, i.e., the config ids where a regression was identified.""" @@ -129,10 +150,14 @@ def get_regressing_config_ids_gt( ) return None + # TODO: fix to prevent double loading time_reports = fpp.MPRTRA(report_files[0].full_path()) - old_time = time_reports.get_old_report() - new_time = time_reports.get_new_report() + old_time = time_reports.get_baseline_report() + # new_time = time_reports.get_new_report() + new_time = time_reports.get_report_for_patch(report_name) + if not new_time: + return None if np.mean(old_time.measurements_wall_clock_time ) == np.mean(new_time.measurements_wall_clock_time): @@ -195,7 +220,9 @@ def report_type(self) -> tp.Type[BaseReport]: return self.__report_type @abc.abstractmethod - def is_regression(self, report_path: ReportFilepath) -> bool: + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: """Checks if there was a regression between the old an new data.""" @@ -208,7 +235,9 @@ def __init__(self) -> None: fpp.MPRTEFA ) - def is_regression(self, report_path: ReportFilepath) -> bool: + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: """Checks if there was a regression between the old an new data.""" is_regression = False @@ -217,13 +246,17 @@ def is_regression(self, report_path: ReportFilepath) -> bool: ) old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - for old_tef_report in multi_report.get_old_report().reports(): + for old_tef_report in multi_report.get_baseline_report().reports(): pim = get_feature_performance_from_tef_report(old_tef_report) for feature, value in pim.items(): old_acc_pim[feature].append(value) new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - for new_tef_report in multi_report.get_new_report().reports(): + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() + + for new_tef_report in opt_mr.reports(): pim = get_feature_performance_from_tef_report(new_tef_report) for feature, value in pim.items(): new_acc_pim[feature].append(value) @@ -257,7 +290,9 @@ def __init__(self) -> None: fpp.MPRPIMA ) - def is_regression(self, report_path: ReportFilepath) -> bool: + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: """Checks if there was a regression between the old an new data.""" is_regression = False @@ -266,7 +301,7 @@ def is_regression(self, report_path: ReportFilepath) -> bool: ) old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - for old_pim_report in multi_report.get_old_report().reports(): + for old_pim_report in multi_report.get_baseline_report().reports(): for region_inter in old_pim_report.region_interaction_entries: name = get_interactions_from_fr_string( old_pim_report._translate_interaction( @@ -277,7 +312,11 @@ def is_regression(self, report_path: ReportFilepath) -> bool: old_acc_pim[name].append(time) new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - for new_pim_report in multi_report.get_new_report().reports(): + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() + + for new_pim_report in opt_mr.reports(): for region_inter in new_pim_report.region_interaction_entries: name = get_interactions_from_fr_string( new_pim_report._translate_interaction( @@ -322,7 +361,7 @@ def is_regression(self, report_path: ReportFilepath) -> bool: def compute_profiler_predictions( profiler: Profiler, project_name: str, case_study: CaseStudy, - config_ids: tp.List[int] + config_ids: tp.List[int], patch_name: str ) -> tp.Optional[tp.Dict[int, bool]]: """Computes the regression predictions for a given profiler.""" @@ -345,7 +384,9 @@ def compute_profiler_predictions( ) return None - result_dict[config_id] = profiler.is_regression(report_files[0]) + result_dict[config_id] = profiler.is_regression( + report_files[0], patch_name + ) return result_dict @@ -363,48 +404,52 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: table_rows = [] for case_study in case_studies: - rev = case_study.revisions[0] - project_name = case_study.project_name - - ground_truth = get_regressing_config_ids_gt( - project_name, case_study, rev - ) + for patch_name in get_patch_names(case_study): + rev = case_study.revisions[0] + project_name = case_study.project_name - new_row = { - 'CaseStudy': - project_name, - 'Configs': - len(case_study.get_config_ids_for_revision(rev)), - 'RegressedConfigs': - len(map_to_positive_config_ids(ground_truth)) - if ground_truth else -1 - } - - for profiler in profilers: - # TODO: multiple patch cycles - predicted = compute_profiler_predictions( - profiler, project_name, case_study, - case_study.get_config_ids_for_revision(rev) + ground_truth = get_regressing_config_ids_gt( + project_name, case_study, rev, patch_name ) - if ground_truth and predicted: - results = ClassificationResults( - map_to_positive_config_ids(ground_truth), - map_to_negative_config_ids(ground_truth), - map_to_positive_config_ids(predicted), - map_to_negative_config_ids(predicted) + new_row = { + 'CaseStudy': + project_name, + 'Patch': + patch_name, + 'Configs': + len(case_study.get_config_ids_for_revision(rev)), + 'RegressedConfigs': + len(map_to_positive_config_ids(ground_truth)) + if ground_truth else -1 + } + + for profiler in profilers: + # TODO: multiple patch cycles + predicted = compute_profiler_predictions( + profiler, project_name, case_study, + case_study.get_config_ids_for_revision(rev), patch_name ) - new_row[f"{profiler.name}_precision"] = results.precision() - new_row[f"{profiler.name}_recall"] = results.recall() - new_row[f"{profiler.name}_baccuracy" - ] = results.balanced_accuracy() - else: - new_row[f"{profiler.name}_precision"] = np.nan - new_row[f"{profiler.name}_recall"] = np.nan - new_row[f"{profiler.name}_baccuracy"] = np.nan - table_rows.append(new_row) - # df.append(new_row, ignore_index=True) + if ground_truth and predicted: + results = ClassificationResults( + map_to_positive_config_ids(ground_truth), + map_to_negative_config_ids(ground_truth), + map_to_positive_config_ids(predicted), + map_to_negative_config_ids(predicted) + ) + new_row[f"{profiler.name}_precision" + ] = results.precision() + new_row[f"{profiler.name}_recall"] = results.recall() + new_row[f"{profiler.name}_baccuracy" + ] = results.balanced_accuracy() + else: + new_row[f"{profiler.name}_precision"] = np.nan + new_row[f"{profiler.name}_recall"] = np.nan + new_row[f"{profiler.name}_baccuracy"] = np.nan + + table_rows.append(new_row) + # df.append(new_row, ignore_index=True) df = pd.concat([df, pd.DataFrame(table_rows)]) df.sort_values(["CaseStudy"], inplace=True) @@ -436,7 +481,8 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: symb_regressed_configs = "$\\mathbb{R}$" print(f"{df=}") - colum_setup = [(' ', 'CaseStudy'), ('', f'{symb_configs}'), + colum_setup = [(' ', 'CaseStudy'), (' ', 'Patch'), + ('', f'{symb_configs}'), ('', f'{symb_regressed_configs}')] for profiler in profilers: colum_setup.append((profiler.name, f'{symb_precision}')) From dbb04aca726842b02d2ef35d81bb937b1101ad69 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 17 Jul 2023 22:37:43 +0200 Subject: [PATCH 061/224] Move recompile to own file --- .../varats/experiment/steps/__init__.py | 0 .../varats/experiment/steps/recompile.py | 28 +++++++++++++++++++ .../vara/feature_perf_precision.py | 21 +------------- 3 files changed, 29 insertions(+), 20 deletions(-) create mode 100644 varats-core/varats/experiment/steps/__init__.py create mode 100644 varats-core/varats/experiment/steps/recompile.py diff --git a/varats-core/varats/experiment/steps/__init__.py b/varats-core/varats/experiment/steps/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/varats-core/varats/experiment/steps/recompile.py b/varats-core/varats/experiment/steps/recompile.py new file mode 100644 index 000000000..47219c26b --- /dev/null +++ b/varats-core/varats/experiment/steps/recompile.py @@ -0,0 +1,28 @@ +"""Recompilation support for experiments.""" +import textwrap + +from benchbuild.utils.actions import ProjectStep, StepResult +from plumbum import ProcessExecutionError + + +class ReCompile(ProjectStep): + """Experiment step to recompile a project.""" + + NAME = "RECOMPILE" + DESCRIPTION = "Recompile the project" + + def __call__(self) -> StepResult: + try: + self.project.recompile() + + except ProcessExecutionError: + self.status = StepResult.ERROR + + self.status = StepResult.OK + + return self.status + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Recompile", indent * " " + ) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index a634cd064..0c24d9f2e 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -23,6 +23,7 @@ get_extra_config_options, ZippedExperimentSteps, ) +from varats.experiment.steps.recompile import ReCompile from varats.experiment.workload_util import WorkloadCategory, workload_commands from varats.experiments.vara.feature_experiment import ( FeatureExperiment, @@ -86,26 +87,6 @@ def __init__(self, path: Path) -> None: super().__init__(path, PerfInfluenceTraceReportAggregate) -class ReCompile(ProjectStep): - NAME = "RECOMPILE" - DESCRIPTION = "Recompile the project" - - def __call__(self, _: tp.Any) -> StepResult: - try: - self.project.recompile() - - except ProcessExecutionError: - self.status = StepResult.ERROR - self.status = StepResult.OK - - return self.status - - def __str__(self, indent: int = 0) -> str: - return textwrap.indent( - f"* {self.project.name}: Recompile", indent * " " - ) - - class RunGenTracedWorkloads(AnalysisProjectStepBase): # type: ignore """Executes the traced project binaries on the specified workloads.""" From 47281da49c7ddf18ae0990446b95706f72c1f641 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 17 Jul 2023 23:21:09 +0200 Subject: [PATCH 062/224] Reworks output folder steps into own base class --- .../varats/experiment/experiment_util.py | 34 +++++++++---------- .../varats/experiments/base/time_workloads.py | 5 +-- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index 3ad21170b..dfc628e83 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -17,7 +17,7 @@ from benchbuild.extensions import base from benchbuild.project import Project from benchbuild.source import enumerate_revisions -from benchbuild.utils.actions import Step, MultiStep, StepResult +from benchbuild.utils.actions import Step, MultiStep, StepResult, ProjectStep from benchbuild.utils.cmd import prlimit, mkdir from plumbum.commands import ProcessExecutionError from plumbum.commands.base import BoundCommand @@ -506,23 +506,22 @@ def __exit__( super().__exit__(exc_type, exc_value, exc_traceback) -@runtime_checkable -class NeedsOutputFolder(Protocol): +class WrongStepCall(Exception): + """Throw if the common step method was called.""" - def __call__(self, tmp_folder: Path) -> StepResult: - ... +class OutputFolderStep(ProjectStep): + """Special step class that needs an output folder to write to.""" -def run_child_with_output_folder( - child: NeedsOutputFolder, tmp_folder: Path -) -> StepResult: - return child(tmp_folder) - + def __call__(self) -> StepResult: + raise WrongStepCall() -# TODO: update extra wrapper step + @abstractmethod + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + """Actual call implementation that gets a path to tmp_folder.""" -class ZippedExperimentSteps(MultiStep[NeedsOutputFolder]): #type: ignore +class ZippedExperimentSteps(MultiStep[tp.Union[OutputFolderStep, ProjectStep]]): """Runs multiple actions, providing them a shared tmp folder that afterwards is zipped into an archive.""" @@ -531,7 +530,7 @@ class ZippedExperimentSteps(MultiStep[NeedsOutputFolder]): #type: ignore def __init__( self, output_filepath: ReportFilepath, - actions: tp.Optional[tp.List[NeedsOutputFolder]] + actions: tp.Optional[tp.List[tp.Union[OutputFolderStep, ProjectStep]]] ) -> None: super().__init__(actions) self.__output_filepath = output_filepath @@ -540,11 +539,10 @@ def __run_children(self, tmp_folder: Path) -> tp.List[StepResult]: results: tp.List[StepResult] = [] for child in self.actions: - results.append( - run_child_with_output_folder( - tp.cast(NeedsOutputFolder, child), tmp_folder - ) - ) + if isinstance(child, OutputFolderStep): + results.append(child.call_with_output_folder(tmp_folder)) + else: + results.append(child()) return results diff --git a/varats/varats/experiments/base/time_workloads.py b/varats/varats/experiments/base/time_workloads.py index 60242728b..aae0d68cb 100644 --- a/varats/varats/experiments/base/time_workloads.py +++ b/varats/varats/experiments/base/time_workloads.py @@ -15,6 +15,7 @@ get_default_compile_error_wrapped, create_new_success_result_filepath, ZippedExperimentSteps, + OutputFolderStep, ) from varats.experiment.workload_util import ( workload_commands, @@ -27,7 +28,7 @@ from varats.report.report import ReportSpecification -class TimeProjectWorkloads(actions.ProjectStep): # type: ignore +class TimeProjectWorkloads(OutputFolderStep): """Times the execution of all project example workloads.""" NAME = "TimeWorkloads" @@ -42,7 +43,7 @@ def __init__( self.__num = num self.__binary = binary - def __call__(self, tmp_dir: Path) -> actions.StepResult: + def call_with_output_folder(self, tmp_dir: Path) -> actions.StepResult: return self.analyze(tmp_dir) def analyze(self, tmp_dir: Path) -> actions.StepResult: From eb6479e24a3432d8e53734524578552153686ec7 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 17 Jul 2023 23:23:51 +0200 Subject: [PATCH 063/224] Adapts code to new output folder steps --- varats-core/varats/provider/patch/patch_provider.py | 6 ++---- .../experiments/vara/feature_perf_precision.py | 13 +++++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 5ae0e92b9..e313af240 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -46,8 +46,7 @@ def __init__(self, project: VProject, patch: 'Patch') -> None: super().__init__(project) self.__patch = patch - # TODO: discuss signature - def __call__(self, _: tp.Any) -> StepResult: + def __call__(self) -> StepResult: try: print( f"Applying {self.__patch.shortname} to " @@ -82,8 +81,7 @@ def __init__(self, project, patch): super().__init__(project) self.__patch = patch - # TODO: discuss signature - def __call__(self, _: tp.Any) -> StepResult: + def __call__(self) -> StepResult: try: print( f"Reverting {self.__patch.shortname} on " diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 0c24d9f2e..179770870 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -22,6 +22,7 @@ get_default_compile_error_wrapped, get_extra_config_options, ZippedExperimentSteps, + OutputFolderStep, ) from varats.experiment.steps.recompile import ReCompile from varats.experiment.workload_util import WorkloadCategory, workload_commands @@ -44,7 +45,7 @@ from varats.utils.git_util import ShortCommitHash -class AnalysisProjectStepBase(ProjectStep): +class AnalysisProjectStepBase(OutputFolderStep): project: VProject @@ -105,7 +106,7 @@ def __init__( ): super().__init__(project, binary, file_name, report_file_ending, reps) - def __call__(self, tmp_dir: Path) -> StepResult: + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) def __str__(self, indent: int = 0) -> str: @@ -273,7 +274,7 @@ def actions_for_project( ) -class RunBackBoxBaseline(ProjectStep): # type: ignore +class RunBackBoxBaseline(OutputFolderStep): # type: ignore """Executes the traced project binaries on the specified workloads.""" NAME = "VaRARunTracedBinaries" @@ -295,7 +296,7 @@ def __init__( self.__reps = reps self.__file_name = file_name - def __call__(self, tmp_dir: Path) -> StepResult: + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) def __str__(self, indent: int = 0) -> str: @@ -446,7 +447,7 @@ def __init__( ): super().__init__(project, binary, file_name, report_file_ending, reps) - def __call__(self, tmp_dir: Path) -> StepResult: + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) def __str__(self, indent: int = 0) -> str: @@ -615,7 +616,7 @@ def __init__( self.__report_file_ending = report_file_ending self.__reps = reps - def __call__(self, tmp_dir: Path) -> StepResult: + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) def __str__(self, indent: int = 0) -> str: From e80a7073a2135c326c970d001c73cf28474f05d3 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 19 Jul 2023 23:43:27 +0200 Subject: [PATCH 064/224] Implements plot and refactors data handling --- .../varats/report/multi_patch_report.py | 3 +- .../feature_perf_precision_database.py | 462 +++++++++++++++++ .../vara/feature_perf_precision.py | 2 +- varats/varats/plots/feature_perf_precision.py | 458 +++++++++++++++++ varats/varats/plots/scatter_plot_utils.py | 3 +- .../varats/tables/feature_perf_precision.py | 481 +----------------- 6 files changed, 940 insertions(+), 469 deletions(-) create mode 100644 varats/varats/data/databases/feature_perf_precision_database.py create mode 100644 varats/varats/plots/feature_perf_precision.py diff --git a/varats-core/varats/report/multi_patch_report.py b/varats-core/varats/report/multi_patch_report.py index e0f7a1d5b..74f861fce 100644 --- a/varats-core/varats/report/multi_patch_report.py +++ b/varats-core/varats/report/multi_patch_report.py @@ -18,6 +18,7 @@ class MultiPatchReport( def __init__(self, path: Path, report_type: tp.Type[ReportTy]) -> None: super().__init__(path) self.__patched_reports: tp.Dict[str, ReportTy] = {} + self.__base = None with tempfile.TemporaryDirectory() as tmp_result_dir: shutil.unpack_archive(path, extract_dir=tmp_result_dir) @@ -33,7 +34,7 @@ def __init__(self, path: Path, report_type: tp.Type[ReportTy]) -> None: if not self.__base or not self.__patched_reports: raise AssertionError( - "Reports where missing in the file {report_path=}" + f"Reports where missing in the file {path=}" ) def get_baseline_report(self) -> ReportTy: diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py new file mode 100644 index 000000000..cf5f9d4a4 --- /dev/null +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -0,0 +1,462 @@ +"""Shared data aggregation function for analyzing feature performance.""" +import abc +import typing as tp +from collections import defaultdict + +import numpy as np +from scipy.stats import ttest_ind + +import varats.experiments.vara.feature_perf_precision as fpp +from varats.experiments.vara.feature_experiment import FeatureExperiment +from varats.paper.case_study import CaseStudy +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.report.gnu_time_report import TimeReportAggregate +from varats.report.report import BaseReport, ReportFilepath +from varats.report.tef_report import ( + TEFReport, + TraceEvent, + TraceEventType, + TEFReportAggregate, +) +from varats.revision.revisions import get_processed_revisions_files +from varats.utils.git_util import FullCommitHash + + +def get_interactions_from_fr_string(interactions: str) -> str: + """Convert the feature strings in a TEFReport from FR(x,y) to x*y, similar + to the format used by SPLConqueror.""" + interactions = ( + interactions.replace("FR", "").replace("(", "").replace(")", "") + ) + interactions_list = interactions.split(",") + # Ignore interactions with base, but do not remove base if it's the only + # feature + if "Base" in interactions_list and len(interactions_list) > 1: + interactions_list.remove("Base") + # Features cannot interact with itself, so remove duplicastes + interactions_list = list(set(interactions_list)) + + interactions_str = "*".join(interactions_list) + + return interactions_str + + +def get_feature_performance_from_tef_report( + tef_report: TEFReport, +) -> tp.Dict[str, int]: + """Extract feature performance from a TEFReport.""" + open_events: tp.List[TraceEvent] = [] + + feature_performances: tp.Dict[str, int] = {} + + for trace_event in tef_report.trace_events: + if trace_event.category == "Feature": + if (trace_event.event_type == TraceEventType.DURATION_EVENT_BEGIN): + open_events.append(trace_event) + elif (trace_event.event_type == TraceEventType.DURATION_EVENT_END): + opening_event = open_events.pop() + + end_timestamp = trace_event.timestamp + begin_timestamp = opening_event.timestamp + + # Subtract feature duration from parent duration such that + # it is not counted twice, similar to behavior in + # Performance-Influence models. + interactions = [event.name for event in open_events] + if open_events: + # Parent is equivalent to interaction of all open + # events. + interaction_string = get_interactions_from_fr_string( + ",".join(interactions) + ) + if interaction_string in feature_performances: + feature_performances[interaction_string] -= ( + end_timestamp - begin_timestamp + ) + else: + feature_performances[interaction_string] = -( + end_timestamp - begin_timestamp + ) + + interaction_string = get_interactions_from_fr_string( + ",".join(interactions + [trace_event.name]) + ) + + current_performance = feature_performances.get( + interaction_string, 0 + ) + feature_performances[interaction_string] = ( + current_performance + end_timestamp - begin_timestamp + ) + + return feature_performances + + +class Profiler(): + """Profiler interface to add different profilers to the evaluation.""" + + def __init__( + self, name: str, experiment: tp.Type[FeatureExperiment], + overhead_experiment: tp.Type[FeatureExperiment], + report_type: tp.Type[BaseReport] + ) -> None: + self.__name = name + self.__experiment = experiment + self.__overhead_experiment = overhead_experiment + self.__report_type = report_type + + @property + def name(self) -> str: + """Hame of the profiler used.""" + return self.__name + + @property + def experiment(self) -> tp.Type[FeatureExperiment]: + """Experiment used to produce this profilers information.""" + return self.__experiment + + @property + def overhead_experiment(self) -> tp.Type[FeatureExperiment]: + """Experiment used to produce this profilers information.""" + return self.__overhead_experiment + + @property + def report_type(self) -> tp.Type[BaseReport]: + """Report type used to load this profilers information.""" + return self.__report_type + + @abc.abstractmethod + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + """Checks if there was a regression between the old an new data.""" + + +class VXray(Profiler): + """Profiler mapper implementation for the vara tef tracer.""" + + def __init__(self) -> None: + super().__init__( + "WXray", fpp.TEFProfileRunner, fpp.TEFProfileOverheadRunner, + fpp.MPRTEFA + ) + + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + """Checks if there was a regression between the old an new data.""" + is_regression = False + + multi_report = fpp.MultiPatchReport( + report_path.full_path(), TEFReportAggregate + ) + + old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for old_tef_report in multi_report.get_baseline_report().reports(): + pim = get_feature_performance_from_tef_report(old_tef_report) + for feature, value in pim.items(): + old_acc_pim[feature].append(value) + + new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() + + for new_tef_report in opt_mr.reports(): + pim = get_feature_performance_from_tef_report(new_tef_report) + for feature, value in pim.items(): + new_acc_pim[feature].append(value) + + for feature, old_values in old_acc_pim.items(): + if feature in new_acc_pim: + new_values = new_acc_pim[feature] + ttest_res = ttest_ind(old_values, new_values) + + # TODO: check, maybe we need a "very small value cut off" + if ttest_res.pvalue < 0.05: + print( + f"{self.name} found regression for feature {feature}." + ) + is_regression = True + else: + print(f"Could not find feature {feature} in new trace.") + # TODO: how to handle this? + is_regression = True + + return is_regression + + +class PIMTracer(Profiler): + """Profiler mapper implementation for the vara performance-influence-model + tracer.""" + + def __init__(self) -> None: + super().__init__( + "PIMTracer", fpp.PIMProfileRunner, fpp.PIMProfileOverheadRunner, + fpp.MPRPIMA + ) + + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + """Checks if there was a regression between the old an new data.""" + is_regression = False + + multi_report = fpp.MultiPatchReport( + report_path.full_path(), fpp.PerfInfluenceTraceReportAggregate + ) + + old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for old_pim_report in multi_report.get_baseline_report().reports(): + for region_inter in old_pim_report.region_interaction_entries: + name = get_interactions_from_fr_string( + old_pim_report._translate_interaction( + region_inter.interaction + ) + ) + time = region_inter.time + old_acc_pim[name].append(time) + + new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() + + for new_pim_report in opt_mr.reports(): + for region_inter in new_pim_report.region_interaction_entries: + name = get_interactions_from_fr_string( + new_pim_report._translate_interaction( + region_inter.interaction + ) + ) + time = region_inter.time + new_acc_pim[name].append(time) + + # TODO: same for TEF + for feature, old_values in old_acc_pim.items(): + if feature in new_acc_pim: + new_values = new_acc_pim[feature] + ttest_res = ttest_ind(old_values, new_values) + + # TODO: check, maybe we need a "very small value cut off" + if ttest_res.pvalue < 0.05: + print( + f"{self.name} found regression for feature {feature}." + ) + is_regression = True + else: + print(f"Could not find feature {feature} in new trace.") + # TODO: how to handle this? + is_regression = True + + return is_regression + + +def get_patch_names(case_study: CaseStudy) -> tp.List[str]: + report_files = get_processed_revisions_files( + case_study.project_name, + fpp.BlackBoxBaselineRunner, + fpp.MPRTRA, + get_case_study_file_name_filter(case_study), + config_id=0 + ) + + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print("Could not find profiling data. config_id=0, profiler=Baseline") + return [] + + # TODO: fix to prevent double loading + time_reports = fpp.MPRTRA(report_files[0].full_path()) + return time_reports.get_patch_names() + + +def get_regressing_config_ids_gt( + project_name: str, case_study: CaseStudy, rev: FullCommitHash, + report_name: str +) -> tp.Optional[tp.Dict[int, bool]]: + """Computes the baseline data, i.e., the config ids where a regression was + identified.""" + + gt: tp.Dict[int, bool] = {} + + for config_id in case_study.get_config_ids_for_revision(rev): + report_files = get_processed_revisions_files( + project_name, + fpp.BlackBoxBaselineRunner, + fpp.MPRTRA, + get_case_study_file_name_filter(case_study), + config_id=config_id + ) + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print( + f"Could not find profiling data. {config_id=}, " + f"profiler=Baseline" + ) + return None + + # TODO: fix to prevent double loading + time_reports = fpp.MPRTRA(report_files[0].full_path()) + + old_time = time_reports.get_baseline_report() + # new_time = time_reports.get_new_report() + new_time = time_reports.get_report_for_patch(report_name) + if not new_time: + return None + + if np.mean(old_time.measurements_wall_clock_time + ) == np.mean(new_time.measurements_wall_clock_time): + gt[config_id] = False + else: + # TODO: double check ttest handling + ttest_res = ttest_ind( + old_time.measurements_wall_clock_time, + new_time.measurements_wall_clock_time + ) + if ttest_res.pvalue < 0.05: + gt[config_id] = True + else: + gt[config_id] = False + + return gt + + +def map_to_positive_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: + return [config_id for config_id, value in reg_dict.items() if value is True] + + +def map_to_negative_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: + return [ + config_id for config_id, value in reg_dict.items() if value is False + ] + + +class Baseline(Profiler): + """Profiler mapper implementation for the black-box baseline.""" + + def __init__(self) -> None: + super().__init__( + "Base", fpp.BlackBoxBaselineRunner, fpp.BlackBoxOverheadBaseline, + fpp.TimeReportAggregate + ) + + def is_regression(self, report_path: ReportFilepath) -> bool: + raise NotImplementedError() + + +def compute_profiler_predictions( + profiler: Profiler, project_name: str, case_study: CaseStudy, + config_ids: tp.List[int], patch_name: str +) -> tp.Optional[tp.Dict[int, bool]]: + """Computes the regression predictions for a given profiler.""" + + result_dict: tp.Dict[int, bool] = {} + for config_id in config_ids: + report_files = get_processed_revisions_files( + project_name, + profiler.experiment, + profiler.report_type, + get_case_study_file_name_filter(case_study), + config_id=config_id + ) + + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print( + f"Could not find profiling data. {config_id=}, " + f"profiler={profiler.name}" + ) + return None + + result_dict[config_id] = profiler.is_regression( + report_files[0], patch_name + ) + + return result_dict + + +class OverheadData: + + def __init__( + self, profiler, mean_time: tp.Dict[int, float], + ctx_switches: tp.Dict[int, float] + ) -> None: + self.__profiler = profiler + self.__mean_time: tp.Dict[int, float] = mean_time + self.__mean_ctx_switches: tp.Dict[int, float] = ctx_switches + + def mean_time(self) -> float: + return float(np.mean(list(self.__mean_time.values()))) + + def mean_ctx(self) -> float: + return float(np.mean(list(self.__mean_ctx_switches.values()))) + + def config_wise_time_diff(self, + other: 'OverheadData') -> tp.Dict[int, float]: + return self.__config_wise(self.__mean_time, other.__mean_time) + + def config_wise_ctx_diff(self, + other: 'OverheadData') -> tp.Dict[int, float]: + return self.__config_wise( + self.__mean_ctx_switches, other.__mean_ctx_switches + ) + + @staticmethod + def __config_wise( + self_map: tp.Dict[int, float], other_map: tp.Dict[int, float] + ) -> tp.Dict[int, float]: + gen_diff: tp.Dict[int, float] = {} + for config_id, gen_value in self_map.items(): + if config_id not in other_map: + raise AssertionError("Could not find config id in other") + + gen_diff[config_id] = gen_value - other_map[config_id] + + return gen_diff + + @staticmethod + def compute_overhead_data( + profiler: Profiler, case_study: CaseStudy, rev: FullCommitHash + ) -> tp.Optional['OverheadData']: + + mean_time: tp.Dict[int, float] = {} + mean_cxt_switches: tp.Dict[int, float] = {} + + for config_id in case_study.get_config_ids_for_revision(rev): + report_files = get_processed_revisions_files( + case_study.project_name, + profiler.overhead_experiment, + TimeReportAggregate, + get_case_study_file_name_filter(case_study), + config_id=config_id + ) + + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print( + f"Could not find overhead data. {config_id=}, " + f"profiler={profiler.name}" + ) + return None + + time_report = TimeReportAggregate(report_files[0].full_path()) + mean_time[config_id] = float( + np.mean(time_report.measurements_wall_clock_time) + ) + mean_cxt_switches[config_id] = float( + np.mean(time_report.measurements_ctx_switches) + ) + if not mean_time: + print( + f"Case study for project {case_study.project_name} had " + "no configs, skipping..." + ) + return None + + # print(f"{mean_time=}") + return OverheadData(profiler, mean_time, mean_cxt_switches) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 179770870..1f7e46cdb 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -596,7 +596,7 @@ def actions_for_project( ) -class RunBackBoxBaselineOverhead(ProjectStep): # type: ignore +class RunBackBoxBaselineOverhead(OutputFolderStep): # type: ignore """Executes the traced project binaries on the specified workloads.""" NAME = "VaRARunTracedBinaries" diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py new file mode 100644 index 000000000..1cd2813d7 --- /dev/null +++ b/varats/varats/plots/feature_perf_precision.py @@ -0,0 +1,458 @@ +"""Module for the FeaturePerfPrecision plots.""" +import random +import typing as tp + +import numpy as np +import pandas as pd +import seaborn as sns + +from varats.data.databases.feature_perf_precision_database import ( + Profiler, + get_regressing_config_ids_gt, + VXray, + PIMTracer, + get_patch_names, + map_to_positive_config_ids, + map_to_negative_config_ids, + compute_profiler_predictions, + Baseline, + OverheadData, +) +from varats.data.metrics import ClassificationResults +from varats.paper.case_study import CaseStudy +from varats.paper.paper_config import get_loaded_paper_config +from varats.plot.plot import Plot +from varats.plot.plots import PlotGenerator +from varats.plots.scatter_plot_utils import multivariate_grid +from varats.utils.exceptions import UnsupportedOperation +from varats.utils.git_util import FullCommitHash + + +def get_fake_prec_rows() -> tp.List[tp.Any]: + fake_rows = [] + fake_prof = [("prof1", 10), ("prof2", 42)] + for prof, seed in fake_prof: + random.seed(seed) + for _ in range(0, 3): + x = random.random() + y = random.random() + new_fake_row = { + 'CaseStudy': "fake", + 'Patch': "fpatch", + 'Configs': 42, + 'RegressedConfigs': 21, + 'precision': x, + 'recall': y, + 'profiler': prof + } + fake_rows.append(new_fake_row) + + return fake_rows + + +class PerfPrecisionPlot(Plot, plot_name='fperf_precision'): + + def plot(self, view_mode: bool) -> None: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer()] + + # Data aggregation + df = pd.DataFrame() + table_rows_plot = [] + + for case_study in case_studies: + for patch_name in get_patch_names(case_study): + rev = case_study.revisions[0] + project_name = case_study.project_name + + ground_truth = get_regressing_config_ids_gt( + project_name, case_study, rev, patch_name + ) + + for profiler in profilers: + new_row = { + 'CaseStudy': + project_name, + 'Patch': + patch_name, + 'Configs': + len(case_study.get_config_ids_for_revision(rev)), + 'RegressedConfigs': + len(map_to_positive_config_ids(ground_truth)) + if ground_truth else -1 + } + + # TODO: multiple patch cycles + predicted = compute_profiler_predictions( + profiler, project_name, case_study, + case_study.get_config_ids_for_revision(rev), patch_name + ) + + if ground_truth and predicted: + results = ClassificationResults( + map_to_positive_config_ids(ground_truth), + map_to_negative_config_ids(ground_truth), + map_to_positive_config_ids(predicted), + map_to_negative_config_ids(predicted) + ) + + new_row['precision'] = results.precision() + new_row['recall'] = results.recall() + new_row['profiler'] = profiler.name + # new_row[f"{profiler.name}_precision" + # ] = results.precision() + # new_row[f"{profiler.name}_recall"] = results.recall() + # new_row[f"{profiler.name}_baccuracy" + # ] = results.balanced_accuracy() + else: + new_row['precision'] = np.nan + new_row['recall'] = np.nan + new_row['profiler'] = profiler.name + + print(f"{new_row=}") + table_rows_plot.append(new_row) + # df.append(new_row, ignore_index=True) + + df = pd.concat([df, pd.DataFrame(table_rows_plot)]) + df = pd.concat([df, pd.DataFrame(get_fake_prec_rows())]) + df.sort_values(["CaseStudy"], inplace=True) + print(f"{df=}") + + print(f"{df['profiler']=}") + grid = multivariate_grid( + df, + 'precision', + 'recall', + 'profiler', + global_kde=True, + alpha=0.8, + legend=False + ) + grid.ax_marg_x.set_xlim(0.0, 1.01) + grid.ax_marg_y.set_ylim(0.0, 1.01) + grid.ax_joint.legend([name for name, _ in df.groupby("profiler")]) + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise UnsupportedOperation + + +class PerfPrecisionPlotGenerator( + PlotGenerator, generator_name="fperf-precision", options=[] +): + + def generate(self) -> tp.List[Plot]: + + return [PerfPrecisionPlot(self.plot_config, **self.plot_kwargs)] + + +def get_fake_overhead_rows(): + fake_rows = [] + fake_prof = [("WXray", 10), ("PIMTracer", 42)] + + new_fake_row = { + 'CaseStudy': "fake", + # 'Patch': "fpatch", + 'WithoutProfiler_mean_time': 42, + 'WithoutProfiler_mean_ctx': 2, + } + + for prof, seed in fake_prof: + random.seed(seed) + # for _ in range(0, 3): + new_fake_row[f"{prof}_time_mean"] = random.randint(2, 230) + new_fake_row[f"{prof}_time_std"] = np.nan + new_fake_row[f"{prof}_time_max"] = np.nan + + new_fake_row[f"{prof}_ctx_mean"] = random.randint(2, 1230) + new_fake_row[f"{prof}_ctx_std"] = np.nan + new_fake_row[f"{prof}_ctx_max"] = np.nan + + fake_rows.append(new_fake_row) + + return fake_rows + + +def get_fake_prec_rows_overhead() -> tp.List[tp.Any]: + fake_rows = [] + fake_prof = [("WXray", 10), ("PIMTracer", 42)] + for prof, seed in fake_prof: + random.seed(seed) + for _ in range(0, 3): + n = 0.1 if prof == "PIMTracer" else 0.0 + x = random.random() + y = random.random() + new_fake_row = { + 'CaseStudy': "fake", + 'Patch': "fpatch", + 'Configs': 42, + 'RegressedConfigs': 21, + 'precision': x - n, + 'recall': y, + 'profiler': prof + } + fake_rows.append(new_fake_row) + + return fake_rows + + +def get_fake_overhead_better_rows(): + # case_study, profiler, overhead_time, overhead_ctx + fake_cs = ["SynthSAContextSensitivity", "fake"] + fake_prof = [("WXray", 10), ("PIMTracer", 12)] + fake_rows = [] + + for prof, seed in fake_prof: + random.seed(seed) + + for cs in fake_cs: + # extra = 1 if prof == 'PIMTracer' else 0 + + new_fake_row = { + 'CaseStudy': cs, + 'profiler': prof, + 'overhead_time': + (random.random() * 4) * 100, # random.randint(2, 230), + 'overhead_ctx': random.randint(2, 1230) + } + fake_rows.append(new_fake_row) + + return fake_rows + + +class PerfOverheadPlot(Plot, plot_name='fperf_overhead'): + + def other_frame(self): + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer()] + + # Data aggregation + df = pd.DataFrame() + table_rows = [] + + for case_study in case_studies: + rev = case_study.revisions[0] + project_name = case_study.project_name + + overhead_ground_truth = OverheadData.compute_overhead_data( + Baseline(), case_study, rev + ) + if not overhead_ground_truth: + print( + f"No baseline data for {case_study.project_name}, skipping" + ) + continue + + new_row = { + 'CaseStudy': project_name, + 'WithoutProfiler_mean_time': overhead_ground_truth.mean_time(), + 'WithoutProfiler_mean_ctx': overhead_ground_truth.mean_ctx() + } + + for profiler in profilers: + profiler_overhead = OverheadData.compute_overhead_data( + profiler, case_study, rev + ) + if profiler_overhead: + time_diff = profiler_overhead.config_wise_time_diff( + overhead_ground_truth + ) + ctx_diff = profiler_overhead.config_wise_ctx_diff( + overhead_ground_truth + ) + print(f"{time_diff=}") + new_row[f"{profiler.name}_time_mean"] = np.mean( + list(time_diff.values()) + ) + new_row[f"{profiler.name}_time_std"] = np.std( + list(time_diff.values()) + ) + new_row[f"{profiler.name}_time_max"] = np.max( + list(time_diff.values()) + ) + new_row[f"{profiler.name}_ctx_mean"] = np.mean( + list(ctx_diff.values()) + ) + new_row[f"{profiler.name}_ctx_std"] = np.std( + list(ctx_diff.values()) + ) + new_row[f"{profiler.name}_ctx_max"] = np.max( + list(ctx_diff.values()) + ) + else: + new_row[f"{profiler.name}_time_mean"] = np.nan + new_row[f"{profiler.name}_time_std"] = np.nan + new_row[f"{profiler.name}_time_max"] = np.nan + + new_row[f"{profiler.name}_ctx_mean"] = np.nan + new_row[f"{profiler.name}_ctx_std"] = np.nan + new_row[f"{profiler.name}_ctx_max"] = np.nan + + table_rows.append(new_row) + # df.append(new_row, ignore_index=True) + + df = pd.concat([df, pd.DataFrame(table_rows)]) + df.sort_values(["CaseStudy"], inplace=True) + # print(f"{df=}") + return df + + def plot(self, view_mode: bool) -> None: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer()] + + # Data aggregation + df = pd.DataFrame() + table_rows_plot = [] + + for case_study in case_studies: + for patch_name in get_patch_names(case_study): + rev = case_study.revisions[0] + project_name = case_study.project_name + + ground_truth = get_regressing_config_ids_gt( + project_name, case_study, rev, patch_name + ) + + for profiler in profilers: + new_row = { + 'CaseStudy': + project_name, + 'Patch': + patch_name, + 'Configs': + len(case_study.get_config_ids_for_revision(rev)), + 'RegressedConfigs': + len(map_to_positive_config_ids(ground_truth)) + if ground_truth else -1 + } + + # TODO: multiple patch cycles + predicted = compute_profiler_predictions( + profiler, project_name, case_study, + case_study.get_config_ids_for_revision(rev), patch_name + ) + + if ground_truth and predicted: + results = ClassificationResults( + map_to_positive_config_ids(ground_truth), + map_to_negative_config_ids(ground_truth), + map_to_positive_config_ids(predicted), + map_to_negative_config_ids(predicted) + ) + + new_row['precision'] = results.precision() + new_row['recall'] = results.recall() + new_row['profiler'] = profiler.name + # new_row[f"{profiler.name}_precision" + # ] = results.precision() + # new_row[f"{profiler.name}_recall"] = results.recall() + # new_row[f"{profiler.name}_baccuracy" + # ] = results.balanced_accuracy() + else: + new_row['precision'] = np.nan + new_row['recall'] = np.nan + new_row['profiler'] = profiler.name + + print(f"{new_row=}") + table_rows_plot.append(new_row) + # df.append(new_row, ignore_index=True) + + df = pd.concat([df, pd.DataFrame(table_rows_plot)]) + df = pd.concat([df, pd.DataFrame(get_fake_prec_rows_overhead())]) + df.sort_values(["CaseStudy"], inplace=True) + print(f"{df=}") + + sub_df = df[["CaseStudy", "precision", "recall", "profiler"]] + sub_df = sub_df.groupby(['CaseStudy', "profiler"], as_index=False).agg({ + 'precision': 'mean', + 'recall': 'mean' + }) + + print(f"{sub_df=}") + + # other_df = self.other_frame() + other_df = pd.DataFrame() + other_df = pd.concat([ + other_df, pd.DataFrame(get_fake_overhead_better_rows()) + ]) + # other_df = other_df.groupby(['CaseStudy', 'profiler']) + print(f"{other_df=}") + + # final_df = sub_df.join(other_df, on=["CaseStudy", "profiler"]) + final_df = pd.merge(sub_df, other_df, on=["CaseStudy", "profiler"]) + print(f"{final_df=}") + + ax = sns.scatterplot( + final_df, + x="precision", + y='overhead_time', + hue="profiler", + style='CaseStudy', + alpha=0.5 + ) + # grid.ax_marg_x.set_xlim(0.0, 1.01) + ax.set_ylabel("Overhead in %") + # ax.set_ylim(np.max(final_df['overhead_time']) + 20, 0) + ax.set_ylim(0, np.max(final_df['overhead_time']) + 20) + ax.set_xlim(0.0, 1.01) + # ax.set_xlim(1.01, 0.0) + ax.xaxis.label.set_size(25) + ax.yaxis.label.set_size(25) + ax.tick_params(labelsize=15) + + prof_df = final_df[['profiler', 'precision', + 'overhead_time']].groupby('profiler').agg('mean') + print(f"{prof_df=}") + sns.scatterplot( + prof_df, + x="precision", + y='overhead_time', + hue="profiler", + color='grey', + ax=ax, + ) + + p = self.plot_pareto_frontier( + final_df['precision'], final_df['overhead_time'] + ) + p = self.plot_pareto_frontier( + prof_df['precision'], prof_df['overhead_time'] + ) + pf_x = [pair[0] for pair in p] + pf_y = [pair[1] for pair in p] + # plt.plot(pf_x, pf_y) + sns.lineplot(x=pf_x, y=pf_y, ax=ax, color='grey') + + # def_totals = pd.DataFrame() + # def_totals.loc['mean'] = [1, 2, 23] + # print(f"{def_totals=}") + + def plot_pareto_frontier(self, Xs, Ys, maxX=True, maxY=True): + """Pareto frontier selection process.""" + sorted_list = sorted([[Xs[i], Ys[i]] for i in range(len(Xs))], + reverse=maxY) + pareto_front = [sorted_list[0]] + for pair in sorted_list[1:]: + if maxY: + if pair[1] >= pareto_front[-1][1]: + pareto_front.append(pair) + else: + if pair[1] <= pareto_front[-1][1]: + pareto_front.append(pair) + + return pareto_front + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise UnsupportedOperation + + +class PerfOverheadPlotGenerator( + PlotGenerator, generator_name="fperf-overhead", options=[] +): + + def generate(self) -> tp.List[Plot]: + + return [PerfOverheadPlot(self.plot_config, **self.plot_kwargs)] diff --git a/varats/varats/plots/scatter_plot_utils.py b/varats/varats/plots/scatter_plot_utils.py index 3510d3da4..005dba2ed 100644 --- a/varats/varats/plots/scatter_plot_utils.py +++ b/varats/varats/plots/scatter_plot_utils.py @@ -13,6 +13,7 @@ def multivariate_grid( y: str, hue: str, global_kde: bool = True, + legend: bool = True, **kwargs: tp.Any ) -> sns.JointGrid: """ @@ -84,7 +85,7 @@ def multivariate_grid( color='grey', warn_singular=False ) - if len(grouped_data) > 1: + if len(grouped_data) > 1 and legend: plt.legend(legends) return grid diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 858c99f85..3fc3ede27 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -1,394 +1,28 @@ -"""Module for the FeaturePerfPrecision table.""" -import abc -import shutil -import tempfile +"""Module for the FeaturePerfPrecision tables.""" import typing as tp -from collections import defaultdict -from pathlib import Path import numpy as np import pandas as pd -from pylatex import Document, NoEscape, Package -from scipy.stats import ttest_ind - -import varats.experiments.vara.feature_perf_precision as fpp +from pylatex import Document, Package + +from varats.data.databases.feature_perf_precision_database import ( + get_patch_names, + get_regressing_config_ids_gt, + map_to_positive_config_ids, + map_to_negative_config_ids, + Profiler, + VXray, + PIMTracer, + Baseline, + compute_profiler_predictions, + OverheadData, +) from varats.data.metrics import ClassificationResults -from varats.experiments.vara.feature_experiment import FeatureExperiment -from varats.experiments.vara.feature_perf_runner import FeaturePerfRunner -from varats.jupyterhelper.file import load_tef_report from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config -from varats.paper_mgmt.case_study import get_case_study_file_name_filter -from varats.report.gnu_time_report import TimeReportAggregate -from varats.report.report import BaseReport, ReportFilepath -from varats.report.tef_report import ( - TEFReport, - TraceEvent, - TraceEventType, - TEFReportAggregate, -) -from varats.revision.revisions import get_processed_revisions_files from varats.table.table import Table from varats.table.table_utils import dataframe_to_table from varats.table.tables import TableFormat, TableGenerator -from varats.utils.git_util import FullCommitHash - - -def get_interactions_from_fr_string(interactions: str) -> str: - """Convert the feature strings in a TEFReport from FR(x,y) to x*y, similar - to the format used by SPLConqueror.""" - interactions = ( - interactions.replace("FR", "").replace("(", "").replace(")", "") - ) - interactions_list = interactions.split(",") - # Ignore interactions with base, but do not remove base if it's the only - # feature - if "Base" in interactions_list and len(interactions_list) > 1: - interactions_list.remove("Base") - # Features cannot interact with itself, so remove duplicastes - interactions_list = list(set(interactions_list)) - - interactions_str = "*".join(interactions_list) - - return interactions_str - - -def get_feature_performance_from_tef_report( - tef_report: TEFReport, -) -> tp.Dict[str, int]: - """Extract feature performance from a TEFReport.""" - open_events: tp.List[TraceEvent] = [] - - feature_performances: tp.Dict[str, int] = {} - - for trace_event in tef_report.trace_events: - if trace_event.category == "Feature": - if (trace_event.event_type == TraceEventType.DURATION_EVENT_BEGIN): - open_events.append(trace_event) - elif (trace_event.event_type == TraceEventType.DURATION_EVENT_END): - opening_event = open_events.pop() - - end_timestamp = trace_event.timestamp - begin_timestamp = opening_event.timestamp - - # Subtract feature duration from parent duration such that - # it is not counted twice, similar to behavior in - # Performance-Influence models. - interactions = [event.name for event in open_events] - if open_events: - # Parent is equivalent to interaction of all open - # events. - interaction_string = get_interactions_from_fr_string( - ",".join(interactions) - ) - if interaction_string in feature_performances: - feature_performances[interaction_string] -= ( - end_timestamp - begin_timestamp - ) - else: - feature_performances[interaction_string] = -( - end_timestamp - begin_timestamp - ) - - interaction_string = get_interactions_from_fr_string( - ",".join(interactions + [trace_event.name]) - ) - - current_performance = feature_performances.get( - interaction_string, 0 - ) - feature_performances[interaction_string] = ( - current_performance + end_timestamp - begin_timestamp - ) - - return feature_performances - - -def get_patch_names(case_study: CaseStudy) -> tp.List[str]: - report_files = get_processed_revisions_files( - case_study.project_name, - fpp.BlackBoxBaselineRunner, - fpp.MPRTRA, - get_case_study_file_name_filter(case_study), - config_id=0 - ) - - if len(report_files) > 1: - raise AssertionError("Should only be one") - if not report_files: - print("Could not find profiling data. config_id=0, profiler=Baseline") - return [] - - # TODO: fix to prevent double loading - time_reports = fpp.MPRTRA(report_files[0].full_path()) - return time_reports.get_patch_names() - - -def get_regressing_config_ids_gt( - project_name: str, case_study: CaseStudy, rev: FullCommitHash, - report_name: str -) -> tp.Optional[tp.Dict[int, bool]]: - """Computes the baseline data, i.e., the config ids where a regression was - identified.""" - - gt: tp.Dict[int, bool] = {} - - for config_id in case_study.get_config_ids_for_revision(rev): - report_files = get_processed_revisions_files( - project_name, - fpp.BlackBoxBaselineRunner, - fpp.MPRTRA, - get_case_study_file_name_filter(case_study), - config_id=config_id - ) - if len(report_files) > 1: - raise AssertionError("Should only be one") - if not report_files: - print( - f"Could not find profiling data. {config_id=}, " - f"profiler=Baseline" - ) - return None - - # TODO: fix to prevent double loading - time_reports = fpp.MPRTRA(report_files[0].full_path()) - - old_time = time_reports.get_baseline_report() - # new_time = time_reports.get_new_report() - new_time = time_reports.get_report_for_patch(report_name) - if not new_time: - return None - - if np.mean(old_time.measurements_wall_clock_time - ) == np.mean(new_time.measurements_wall_clock_time): - gt[config_id] = False - else: - # TODO: double check ttest handling - ttest_res = ttest_ind( - old_time.measurements_wall_clock_time, - new_time.measurements_wall_clock_time - ) - if ttest_res.pvalue < 0.05: - gt[config_id] = True - else: - gt[config_id] = False - - return gt - - -def map_to_positive_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: - return [config_id for config_id, value in reg_dict.items() if value is True] - - -def map_to_negative_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: - return [ - config_id for config_id, value in reg_dict.items() if value is False - ] - - -class Profiler(): - """Profiler interface to add different profilers to the evaluation.""" - - def __init__( - self, name: str, experiment: tp.Type[FeatureExperiment], - overhead_experiment: tp.Type[FeatureExperiment], - report_type: tp.Type[BaseReport] - ) -> None: - self.__name = name - self.__experiment = experiment - self.__overhead_experiment = overhead_experiment - self.__report_type = report_type - - @property - def name(self) -> str: - """Hame of the profiler used.""" - return self.__name - - @property - def experiment(self) -> tp.Type[FeatureExperiment]: - """Experiment used to produce this profilers information.""" - return self.__experiment - - @property - def overhead_experiment(self) -> tp.Type[FeatureExperiment]: - """Experiment used to produce this profilers information.""" - return self.__overhead_experiment - - @property - def report_type(self) -> tp.Type[BaseReport]: - """Report type used to load this profilers information.""" - return self.__report_type - - @abc.abstractmethod - def is_regression( - self, report_path: ReportFilepath, patch_name: str - ) -> bool: - """Checks if there was a regression between the old an new data.""" - - -class VXray(Profiler): - """Profiler mapper implementation for the vara tef tracer.""" - - def __init__(self) -> None: - super().__init__( - "WXray", fpp.TEFProfileRunner, fpp.TEFProfileOverheadRunner, - fpp.MPRTEFA - ) - - def is_regression( - self, report_path: ReportFilepath, patch_name: str - ) -> bool: - """Checks if there was a regression between the old an new data.""" - is_regression = False - - multi_report = fpp.MultiPatchReport( - report_path.full_path(), TEFReportAggregate - ) - - old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - for old_tef_report in multi_report.get_baseline_report().reports(): - pim = get_feature_performance_from_tef_report(old_tef_report) - for feature, value in pim.items(): - old_acc_pim[feature].append(value) - - new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - opt_mr = multi_report.get_report_for_patch(patch_name) - if not opt_mr: - raise NotImplementedError() - - for new_tef_report in opt_mr.reports(): - pim = get_feature_performance_from_tef_report(new_tef_report) - for feature, value in pim.items(): - new_acc_pim[feature].append(value) - - for feature, old_values in old_acc_pim.items(): - if feature in new_acc_pim: - new_values = new_acc_pim[feature] - ttest_res = ttest_ind(old_values, new_values) - - # TODO: check, maybe we need a "very small value cut off" - if ttest_res.pvalue < 0.05: - print( - f"{self.name} found regression for feature {feature}." - ) - is_regression = True - else: - print(f"Could not find feature {feature} in new trace.") - # TODO: how to handle this? - is_regression = True - - return is_regression - - -class PIMTracer(Profiler): - """Profiler mapper implementation for the vara performance-influence-model - tracer.""" - - def __init__(self) -> None: - super().__init__( - "PIM Tracer", fpp.PIMProfileRunner, fpp.PIMProfileOverheadRunner, - fpp.MPRPIMA - ) - - def is_regression( - self, report_path: ReportFilepath, patch_name: str - ) -> bool: - """Checks if there was a regression between the old an new data.""" - is_regression = False - - multi_report = fpp.MultiPatchReport( - report_path.full_path(), fpp.PerfInfluenceTraceReportAggregate - ) - - old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - for old_pim_report in multi_report.get_baseline_report().reports(): - for region_inter in old_pim_report.region_interaction_entries: - name = get_interactions_from_fr_string( - old_pim_report._translate_interaction( - region_inter.interaction - ) - ) - time = region_inter.time - old_acc_pim[name].append(time) - - new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - opt_mr = multi_report.get_report_for_patch(patch_name) - if not opt_mr: - raise NotImplementedError() - - for new_pim_report in opt_mr.reports(): - for region_inter in new_pim_report.region_interaction_entries: - name = get_interactions_from_fr_string( - new_pim_report._translate_interaction( - region_inter.interaction - ) - ) - time = region_inter.time - new_acc_pim[name].append(time) - - # TODO: same for TEF - for feature, old_values in old_acc_pim.items(): - if feature in new_acc_pim: - new_values = new_acc_pim[feature] - ttest_res = ttest_ind(old_values, new_values) - - # TODO: check, maybe we need a "very small value cut off" - if ttest_res.pvalue < 0.05: - print( - f"{self.name} found regression for feature {feature}." - ) - is_regression = True - else: - print(f"Could not find feature {feature} in new trace.") - # TODO: how to handle this? - is_regression = True - - return is_regression - - -class Baseline(Profiler): - """Profiler mapper implementation for the black-box baseline.""" - - def __init__(self) -> None: - super().__init__( - "Base", fpp.BlackBoxBaselineRunner, fpp.BlackBoxOverheadBaseline, - fpp.TimeReportAggregate - ) - - def is_regression(self, report_path: ReportFilepath) -> bool: - raise NotImplementedError() - - -def compute_profiler_predictions( - profiler: Profiler, project_name: str, case_study: CaseStudy, - config_ids: tp.List[int], patch_name: str -) -> tp.Optional[tp.Dict[int, bool]]: - """Computes the regression predictions for a given profiler.""" - - result_dict: tp.Dict[int, bool] = {} - for config_id in config_ids: - report_files = get_processed_revisions_files( - project_name, - profiler.experiment, - profiler.report_type, - get_case_study_file_name_filter(case_study), - config_id=config_id - ) - - if len(report_files) > 1: - raise AssertionError("Should only be one") - if not report_files: - print( - f"Could not find profiling data. {config_id=}, " - f"profiler={profiler.name}" - ) - return None - - result_dict[config_id] = profiler.is_regression( - report_files[0], patch_name - ) - - return result_dict class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): @@ -540,91 +174,6 @@ def generate(self) -> tp.List[Table]: ] -class OverheadData: - - def __init__( - self, profiler, mean_time: tp.Dict[int, float], - ctx_switches: tp.Dict[int, float] - ) -> None: - self.__profiler = profiler - self.__mean_time: tp.Dict[int, float] = mean_time - self.__mean_ctx_switches: tp.Dict[int, float] = ctx_switches - - def mean_time(self) -> float: - return np.mean(list(map(lambda x: float(x), self.__mean_time.values()))) - - def mean_ctx(self) -> float: - return np.mean( - list(map(lambda x: float(x), self.__mean_ctx_switches.values())) - ) - - def config_wise_time_diff(self, - other: 'OverheadData') -> tp.Dict[int, float]: - return self.__config_wise(self.__mean_time, other.__mean_time) - - def config_wise_ctx_diff(self, - other: 'OverheadData') -> tp.Dict[int, float]: - return self.__config_wise( - self.__mean_ctx_switches, other.__mean_ctx_switches - ) - - @staticmethod - def __config_wise( - self_map: tp.Dict[int, float], other_map: tp.Dict[int, float] - ) -> tp.Dict[int, float]: - gen_diff: tp.Dict[int, float] = {} - for config_id, gen_value in self_map.items(): - if config_id not in other_map: - raise AssertionError("Could not find config id in other") - - gen_diff[config_id] = gen_value - other_map[config_id] - - return gen_diff - - @staticmethod - def compute_overhead_data( - profiler: Profiler, case_study: CaseStudy, rev: FullCommitHash - ) -> tp.Optional['OverheadData']: - - mean_time: tp.Dict[int, float] = {} - mean_cxt_switches: tp.Dict[int, float] = {} - - for config_id in case_study.get_config_ids_for_revision(rev): - report_files = get_processed_revisions_files( - case_study.project_name, - profiler.overhead_experiment, - TimeReportAggregate, - get_case_study_file_name_filter(case_study), - config_id=config_id - ) - - if len(report_files) > 1: - raise AssertionError("Should only be one") - if not report_files: - print( - f"Could not find overhead data. {config_id=}, " - f"profiler={profiler.name}" - ) - return None - - time_report = TimeReportAggregate(report_files[0].full_path()) - mean_time[config_id] = float( - np.mean(time_report.measurements_wall_clock_time) - ) - mean_cxt_switches[config_id] = float( - np.mean(time_report.measurements_ctx_switches) - ) - if not mean_time: - print( - f"Case study for project {case_study.project_name} had " - "no configs, skipping..." - ) - return None - - # print(f"{mean_time=}") - return OverheadData(profiler, mean_time, mean_cxt_switches) - - class FeaturePerfOverheadTable(Table, table_name="fperf_overhead"): """Table that compares overhead of different feature performance measurement approaches.""" From 50e9fded6296753911d641a813848b0ca0c1c1bd Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 20 Jul 2023 00:21:32 +0200 Subject: [PATCH 065/224] Fixes example plotting and pareto bug --- varats/varats/plots/feature_perf_precision.py | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 1cd2813d7..c17a86a35 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -180,7 +180,7 @@ def get_fake_prec_rows_overhead() -> tp.List[tp.Any]: for prof, seed in fake_prof: random.seed(seed) for _ in range(0, 3): - n = 0.1 if prof == "PIMTracer" else 0.0 + n = -0.1 if prof == "PIMTracer" else 0.0 x = random.random() y = random.random() new_fake_row = { @@ -385,17 +385,18 @@ def plot(self, view_mode: bool) -> None: ax = sns.scatterplot( final_df, - x="precision", - y='overhead_time', + x='overhead_time', + y="precision", hue="profiler", style='CaseStudy', alpha=0.5 ) # grid.ax_marg_x.set_xlim(0.0, 1.01) - ax.set_ylabel("Overhead in %") + ax.set_xlabel("Overhead in %") # ax.set_ylim(np.max(final_df['overhead_time']) + 20, 0) - ax.set_ylim(0, np.max(final_df['overhead_time']) + 20) - ax.set_xlim(0.0, 1.01) + ax.set_ylim(0.0, 1.01) + # ax.set_xlim(0, np.max(final_df['overhead_time']) + 20) + ax.set_xlim(np.max(final_df['overhead_time']) + 20, 0) # ax.set_xlim(1.01, 0.0) ax.xaxis.label.set_size(25) ax.yaxis.label.set_size(25) @@ -406,23 +407,25 @@ def plot(self, view_mode: bool) -> None: print(f"{prof_df=}") sns.scatterplot( prof_df, - x="precision", - y='overhead_time', + x='overhead_time', + y="precision", hue="profiler", color='grey', ax=ax, + legend=False, ) + # p = self.plot_pareto_frontier( + # final_df['precision'], final_df['overhead_time'] + # ) p = self.plot_pareto_frontier( - final_df['precision'], final_df['overhead_time'] - ) - p = self.plot_pareto_frontier( - prof_df['precision'], prof_df['overhead_time'] + prof_df['overhead_time'], prof_df['precision'], maxX=False ) pf_x = [pair[0] for pair in p] pf_y = [pair[1] for pair in p] + print(f"{pf_x=}, {pf_y=}") # plt.plot(pf_x, pf_y) - sns.lineplot(x=pf_x, y=pf_y, ax=ax, color='grey') + sns.lineplot(x=pf_x, y=pf_y, ax=ax, color='grey', legend=False) # def_totals = pd.DataFrame() # def_totals.loc['mean'] = [1, 2, 23] @@ -431,7 +434,8 @@ def plot(self, view_mode: bool) -> None: def plot_pareto_frontier(self, Xs, Ys, maxX=True, maxY=True): """Pareto frontier selection process.""" sorted_list = sorted([[Xs[i], Ys[i]] for i in range(len(Xs))], - reverse=maxY) + reverse=maxX) + print(f"{sorted_list=}") pareto_front = [sorted_list[0]] for pair in sorted_list[1:]: if maxY: From 10e773c6912493e67a1c4b5f5ab6afb58d2f9c91 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 20 Jul 2023 13:34:11 +0200 Subject: [PATCH 066/224] Plot polishing --- varats/varats/plots/feature_perf_precision.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index c17a86a35..141432954 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -124,14 +124,20 @@ def plot(self, view_mode: bool) -> None: 'precision', 'recall', 'profiler', - global_kde=True, + global_kde=False, alpha=0.8, - legend=False + legend=False, + s=100 ) - grid.ax_marg_x.set_xlim(0.0, 1.01) - grid.ax_marg_y.set_ylim(0.0, 1.01) + grid.ax_marg_x.set_xlim(0.0, 1.02) + grid.ax_marg_y.set_ylim(0.0, 1.02) grid.ax_joint.legend([name for name, _ in df.groupby("profiler")]) + grid.ax_joint.set_xlabel("Precision") + grid.ax_joint.set_ylabel("Recall") + grid.ax_joint.xaxis.label.set_size(20) + grid.ax_joint.yaxis.label.set_size(20) + def calc_missing_revisions( self, boundary_gradient: float ) -> tp.Set[FullCommitHash]: From f93fc734880adc940a5386841d50167edc06dae5 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 20 Jul 2023 14:09:33 +0200 Subject: [PATCH 067/224] More plot RQ2 tweaks --- varats/varats/plots/feature_perf_precision.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 141432954..0640dde6c 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -125,7 +125,7 @@ def plot(self, view_mode: bool) -> None: 'recall', 'profiler', global_kde=False, - alpha=0.8, + alpha=0.7, legend=False, s=100 ) @@ -395,10 +395,12 @@ def plot(self, view_mode: bool) -> None: y="precision", hue="profiler", style='CaseStudy', - alpha=0.5 + alpha=0.5, + s=100 ) # grid.ax_marg_x.set_xlim(0.0, 1.01) ax.set_xlabel("Overhead in %") + ax.set_ylabel("F1-Score") # ax.set_ylim(np.max(final_df['overhead_time']) + 20, 0) ax.set_ylim(0.0, 1.01) # ax.set_xlim(0, np.max(final_df['overhead_time']) + 20) @@ -416,9 +418,10 @@ def plot(self, view_mode: bool) -> None: x='overhead_time', y="precision", hue="profiler", - color='grey', + color='dimgrey', ax=ax, legend=False, + s=100 ) # p = self.plot_pareto_frontier( @@ -431,7 +434,9 @@ def plot(self, view_mode: bool) -> None: pf_y = [pair[1] for pair in p] print(f"{pf_x=}, {pf_y=}") # plt.plot(pf_x, pf_y) - sns.lineplot(x=pf_x, y=pf_y, ax=ax, color='grey', legend=False) + sns.lineplot( + x=pf_x, y=pf_y, ax=ax, color='grey', legend=False, linewidth=2.5 + ) # def_totals = pd.DataFrame() # def_totals.loc['mean'] = [1, 2, 23] From 2d3b3d14051707604b1d245f911e795f6300d7b8 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 20 Jul 2023 16:55:38 +0200 Subject: [PATCH 068/224] Strange pareto plot with a lot of extras :D --- varats/varats/plots/feature_perf_precision.py | 153 ++++++++++++++---- 1 file changed, 120 insertions(+), 33 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 0640dde6c..7fa157ad3 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -5,6 +5,8 @@ import numpy as np import pandas as pd import seaborn as sns +from matplotlib import pyplot as plt +from matplotlib.text import Text from varats.data.databases.feature_perf_precision_database import ( Profiler, @@ -43,7 +45,7 @@ def get_fake_prec_rows() -> tp.List[tp.Any]: 'RegressedConfigs': 21, 'precision': x, 'recall': y, - 'profiler': prof + 'Profiler': prof } fake_rows.append(new_fake_row) @@ -98,7 +100,7 @@ def plot(self, view_mode: bool) -> None: new_row['precision'] = results.precision() new_row['recall'] = results.recall() - new_row['profiler'] = profiler.name + new_row['Profiler'] = profiler.name # new_row[f"{profiler.name}_precision" # ] = results.precision() # new_row[f"{profiler.name}_recall"] = results.recall() @@ -107,7 +109,7 @@ def plot(self, view_mode: bool) -> None: else: new_row['precision'] = np.nan new_row['recall'] = np.nan - new_row['profiler'] = profiler.name + new_row['Profiler'] = profiler.name print(f"{new_row=}") table_rows_plot.append(new_row) @@ -118,12 +120,12 @@ def plot(self, view_mode: bool) -> None: df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") - print(f"{df['profiler']=}") + print(f"{df['Profiler']=}") grid = multivariate_grid( df, 'precision', 'recall', - 'profiler', + 'Profiler', global_kde=False, alpha=0.7, legend=False, @@ -131,7 +133,7 @@ def plot(self, view_mode: bool) -> None: ) grid.ax_marg_x.set_xlim(0.0, 1.02) grid.ax_marg_y.set_ylim(0.0, 1.02) - grid.ax_joint.legend([name for name, _ in df.groupby("profiler")]) + grid.ax_joint.legend([name for name, _ in df.groupby("Profiler")]) grid.ax_joint.set_xlabel("Precision") grid.ax_joint.set_ylabel("Recall") @@ -196,7 +198,7 @@ def get_fake_prec_rows_overhead() -> tp.List[tp.Any]: 'RegressedConfigs': 21, 'precision': x - n, 'recall': y, - 'profiler': prof + 'Profiler': prof } fake_rows.append(new_fake_row) @@ -217,7 +219,7 @@ def get_fake_overhead_better_rows(): new_fake_row = { 'CaseStudy': cs, - 'profiler': prof, + 'Profiler': prof, 'overhead_time': (random.random() * 4) * 100, # random.randint(2, 230), 'overhead_ctx': random.randint(2, 1230) @@ -349,7 +351,7 @@ def plot(self, view_mode: bool) -> None: new_row['precision'] = results.precision() new_row['recall'] = results.recall() - new_row['profiler'] = profiler.name + new_row['Profiler'] = profiler.name # new_row[f"{profiler.name}_precision" # ] = results.precision() # new_row[f"{profiler.name}_recall"] = results.recall() @@ -358,7 +360,7 @@ def plot(self, view_mode: bool) -> None: else: new_row['precision'] = np.nan new_row['recall'] = np.nan - new_row['profiler'] = profiler.name + new_row['Profiler'] = profiler.name print(f"{new_row=}") table_rows_plot.append(new_row) @@ -369,8 +371,8 @@ def plot(self, view_mode: bool) -> None: df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") - sub_df = df[["CaseStudy", "precision", "recall", "profiler"]] - sub_df = sub_df.groupby(['CaseStudy', "profiler"], as_index=False).agg({ + sub_df = df[["CaseStudy", "precision", "recall", "Profiler"]] + sub_df = sub_df.groupby(['CaseStudy', "Profiler"], as_index=False).agg({ 'precision': 'mean', 'recall': 'mean' }) @@ -382,60 +384,123 @@ def plot(self, view_mode: bool) -> None: other_df = pd.concat([ other_df, pd.DataFrame(get_fake_overhead_better_rows()) ]) - # other_df = other_df.groupby(['CaseStudy', 'profiler']) + # other_df = other_df.groupby(['CaseStudy', 'Profiler']) print(f"{other_df=}") - # final_df = sub_df.join(other_df, on=["CaseStudy", "profiler"]) - final_df = pd.merge(sub_df, other_df, on=["CaseStudy", "profiler"]) + # final_df = sub_df.join(other_df, on=["CaseStudy", "Profiler"]) + final_df = pd.merge(sub_df, other_df, on=["CaseStudy", "Profiler"]) print(f"{final_df=}") ax = sns.scatterplot( final_df, x='overhead_time', y="precision", - hue="profiler", + hue="Profiler", style='CaseStudy', alpha=0.5, s=100 ) + + print(f"{ax.legend()=}") + print(f"{type(ax.legend())=}") + print(f"{ax.legend().get_children()=}") + print(f"{ax.legend().prop=}") + print(f"{ax.legend().get_title()}") + print(f"{ax.legend().get_lines()}") + print(f"{ax.legend().get_patches()}") + print(f"{ax.legend().get_texts()}") + ax.legend().set_title("Walrus") + + for text_obj in ax.legend().get_texts(): + text_obj: Text + + text_obj.set_fontsize("small") + print(f"{text_obj=}") + if text_obj.get_text() == "Profiler": + text_obj.set_text("Profilers") + text_obj.set_fontweight("bold") + + if text_obj.get_text() == "CaseStudy": + text_obj.set_text("Subject Systems") + text_obj.set_fontweight("bold") + + # ax.legend().set_bbox_to_anchor((1, 0.5)) + # grid.ax_marg_x.set_xlim(0.0, 1.01) ax.set_xlabel("Overhead in %") ax.set_ylabel("F1-Score") # ax.set_ylim(np.max(final_df['overhead_time']) + 20, 0) - ax.set_ylim(0.0, 1.01) + ax.set_ylim(0.0, 1.02) # ax.set_xlim(0, np.max(final_df['overhead_time']) + 20) ax.set_xlim(np.max(final_df['overhead_time']) + 20, 0) # ax.set_xlim(1.01, 0.0) - ax.xaxis.label.set_size(25) - ax.yaxis.label.set_size(25) + ax.xaxis.label.set_size(20) + ax.yaxis.label.set_size(20) ax.tick_params(labelsize=15) - prof_df = final_df[['profiler', 'precision', - 'overhead_time']].groupby('profiler').agg('mean') + prof_df = final_df[['Profiler', 'precision', 'overhead_time' + ]].groupby('Profiler').agg(['mean', 'std']) print(f"{prof_df=}") + p = self.plot_pareto_frontier( + prof_df['overhead_time']['mean'], + prof_df['precision']['mean'], + maxX=False + ) + p = self.plot_pareto_frontier_std( + prof_df['overhead_time']['mean'], + prof_df['precision']['mean'], + prof_df['overhead_time']['std'], + prof_df['precision']['std'], + maxX=False + ) + + pf_x = [pair[0] for pair in p] + pf_y = [pair[1] for pair in p] + pf_x_error = [pair[2] for pair in p] + pf_y_error = [pair[3] for pair in p] + + ax.errorbar( + pf_x, + pf_y, + xerr=pf_x_error, + yerr=pf_y_error, + fmt='none', + color='grey', + zorder=0, + capsize=2, + capthick=0.6, + elinewidth=0.6 + ) + sns.scatterplot( prof_df, - x='overhead_time', - y="precision", - hue="profiler", - color='dimgrey', + x=('overhead_time', 'mean'), + y=("precision", 'mean'), + hue="Profiler", ax=ax, legend=False, - s=100 + s=100, + zorder=2 ) # p = self.plot_pareto_frontier( # final_df['precision'], final_df['overhead_time'] # ) - p = self.plot_pareto_frontier( - prof_df['overhead_time'], prof_df['precision'], maxX=False - ) - pf_x = [pair[0] for pair in p] - pf_y = [pair[1] for pair in p] - print(f"{pf_x=}, {pf_y=}") + + print(f"""{pf_x=} +{pf_y=} +{pf_x_error=} +{pf_y_error=} +""") # plt.plot(pf_x, pf_y) sns.lineplot( - x=pf_x, y=pf_y, ax=ax, color='grey', legend=False, linewidth=2.5 + x=pf_x, + y=pf_y, + ax=ax, + color='firebrick', + legend=False, + linewidth=2.5, + zorder=1 ) # def_totals = pd.DataFrame() @@ -449,6 +514,28 @@ def plot_pareto_frontier(self, Xs, Ys, maxX=True, maxY=True): print(f"{sorted_list=}") pareto_front = [sorted_list[0]] for pair in sorted_list[1:]: + print(f"{pair=}") + if maxY: + if pair[1] >= pareto_front[-1][1]: + pareto_front.append(pair) + else: + if pair[1] <= pareto_front[-1][1]: + pareto_front.append(pair) + + return pareto_front + + def plot_pareto_frontier_std( + self, Xs, Ys, Xstds, Ystds, maxX=True, maxY=True + ): + """Pareto frontier selection process.""" + sorted_list = sorted([ + [Xs[i], Ys[i], Xstds[i], Ystds[i]] for i in range(len(Xs)) + ], + reverse=maxX) + print(f"{sorted_list=}") + pareto_front = [sorted_list[0]] + for pair in sorted_list[1:]: + print(f"{pair=}") if maxY: if pair[1] >= pareto_front[-1][1]: pareto_front.append(pair) From 8a5b6a4aa46b6f702cc2420e513234cda49fb04b Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 22 Jul 2023 22:10:50 +0200 Subject: [PATCH 069/224] Refactor f1_score plotting --- varats/varats/plots/feature_perf_precision.py | 74 ++++++++++++------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 7fa157ad3..ee325cf37 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -191,6 +191,7 @@ def get_fake_prec_rows_overhead() -> tp.List[tp.Any]: n = -0.1 if prof == "PIMTracer" else 0.0 x = random.random() y = random.random() + z = random.random() new_fake_row = { 'CaseStudy': "fake", 'Patch': "fpatch", @@ -198,6 +199,7 @@ def get_fake_prec_rows_overhead() -> tp.List[tp.Any]: 'RegressedConfigs': 21, 'precision': x - n, 'recall': y, + 'f1_score': z, 'Profiler': prof } fake_rows.append(new_fake_row) @@ -351,6 +353,7 @@ def plot(self, view_mode: bool) -> None: new_row['precision'] = results.precision() new_row['recall'] = results.recall() + new_row['f1_score'] = results.f1_score() new_row['Profiler'] = profiler.name # new_row[f"{profiler.name}_precision" # ] = results.precision() @@ -360,6 +363,7 @@ def plot(self, view_mode: bool) -> None: else: new_row['precision'] = np.nan new_row['recall'] = np.nan + new_row['f1_score'] = np.nan new_row['Profiler'] = profiler.name print(f"{new_row=}") @@ -367,14 +371,17 @@ def plot(self, view_mode: bool) -> None: # df.append(new_row, ignore_index=True) df = pd.concat([df, pd.DataFrame(table_rows_plot)]) - df = pd.concat([df, pd.DataFrame(get_fake_prec_rows_overhead())]) + # df = pd.concat([df, pd.DataFrame(get_fake_prec_rows_overhead())]) df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") - sub_df = df[["CaseStudy", "precision", "recall", "Profiler"]] + sub_df = df[[ + "CaseStudy", "precision", "recall", "Profiler", "f1_score" + ]] sub_df = sub_df.groupby(['CaseStudy', "Profiler"], as_index=False).agg({ 'precision': 'mean', - 'recall': 'mean' + 'recall': 'mean', + 'f1_score': 'mean' }) print(f"{sub_df=}") @@ -387,6 +394,9 @@ def plot(self, view_mode: bool) -> None: # other_df = other_df.groupby(['CaseStudy', 'Profiler']) print(f"{other_df=}") + target_row = "f1_score" + # target_row = "precision" + # final_df = sub_df.join(other_df, on=["CaseStudy", "Profiler"]) final_df = pd.merge(sub_df, other_df, on=["CaseStudy", "Profiler"]) print(f"{final_df=}") @@ -394,7 +404,7 @@ def plot(self, view_mode: bool) -> None: ax = sns.scatterplot( final_df, x='overhead_time', - y="precision", + y=target_row, hue="Profiler", style='CaseStudy', alpha=0.5, @@ -428,7 +438,9 @@ def plot(self, view_mode: bool) -> None: # grid.ax_marg_x.set_xlim(0.0, 1.01) ax.set_xlabel("Overhead in %") - ax.set_ylabel("F1-Score") + if target_row == "f1_score": + ax.set_ylabel("F1-Score") + # ax.set_ylim(np.max(final_df['overhead_time']) + 20, 0) ax.set_ylim(0.0, 1.02) # ax.set_xlim(0, np.max(final_df['overhead_time']) + 20) @@ -438,32 +450,40 @@ def plot(self, view_mode: bool) -> None: ax.yaxis.label.set_size(20) ax.tick_params(labelsize=15) - prof_df = final_df[['Profiler', 'precision', 'overhead_time' - ]].groupby('Profiler').agg(['mean', 'std']) + prof_df = final_df[[ + 'Profiler', 'precision', 'overhead_time', 'f1_score' + ]].groupby('Profiler').agg(['mean', 'std']) + prof_df.fillna(0, inplace=True) + print(f"{prof_df=}") p = self.plot_pareto_frontier( prof_df['overhead_time']['mean'], - prof_df['precision']['mean'], - maxX=False - ) - p = self.plot_pareto_frontier_std( - prof_df['overhead_time']['mean'], - prof_df['precision']['mean'], - prof_df['overhead_time']['std'], - prof_df['precision']['std'], + prof_df[target_row]['mean'], maxX=False ) + # p = self.plot_pareto_frontier_std( + # prof_df['overhead_time']['mean'], + # prof_df[target_row]['mean'], + # prof_df['overhead_time']['std'], + # prof_df[target_row]['std'], + # maxX=False + # ) pf_x = [pair[0] for pair in p] pf_y = [pair[1] for pair in p] - pf_x_error = [pair[2] for pair in p] - pf_y_error = [pair[3] for pair in p] + # pf_x_error = [pair[2] for pair in p] + # pf_y_error = [pair[3] for pair in p] + + x_loc = prof_df['overhead_time']['mean'] + y_loc = prof_df[target_row]['mean'] + x_error = prof_df['overhead_time']['std'] + y_error = prof_df[target_row]['std'] ax.errorbar( - pf_x, - pf_y, - xerr=pf_x_error, - yerr=pf_y_error, + x_loc, # pf_x, + y_loc, # pf_y, + xerr=x_error, # xerr=pf_x_error, + yerr=y_error, # yerr=pf_y_error, fmt='none', color='grey', zorder=0, @@ -475,7 +495,7 @@ def plot(self, view_mode: bool) -> None: sns.scatterplot( prof_df, x=('overhead_time', 'mean'), - y=("precision", 'mean'), + y=(target_row, 'mean'), hue="Profiler", ax=ax, legend=False, @@ -487,11 +507,11 @@ def plot(self, view_mode: bool) -> None: # final_df['precision'], final_df['overhead_time'] # ) - print(f"""{pf_x=} -{pf_y=} -{pf_x_error=} -{pf_y_error=} -""") + # print(f"""{pf_x=} + # {pf_y=} + # {pf_x_error=} + # {pf_y_error=} + # """) # plt.plot(pf_x, pf_y) sns.lineplot( x=pf_x, From 2b9c6b06f3aeb8d093bfb8a2739c436768f604d3 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 22 Jul 2023 22:23:40 +0200 Subject: [PATCH 070/224] Moves precision calc of plots to database --- .../feature_perf_precision_database.py | 63 ++++++++++ varats/varats/plots/feature_perf_precision.py | 118 +----------------- 2 files changed, 66 insertions(+), 115 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index cf5f9d4a4..e5e5a7c4a 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -4,9 +4,11 @@ from collections import defaultdict import numpy as np +import pandas as pd from scipy.stats import ttest_ind import varats.experiments.vara.feature_perf_precision as fpp +from varats.data.metrics import ClassificationResults from varats.experiments.vara.feature_experiment import FeatureExperiment from varats.paper.case_study import CaseStudy from varats.paper_mgmt.case_study import get_case_study_file_name_filter @@ -460,3 +462,64 @@ def compute_overhead_data( # print(f"{mean_time=}") return OverheadData(profiler, mean_time, mean_cxt_switches) + + +def load_precision_data(case_studies, profilers): + table_rows_plot = [] + for case_study in case_studies: + for patch_name in get_patch_names(case_study): + rev = case_study.revisions[0] + project_name = case_study.project_name + + ground_truth = get_regressing_config_ids_gt( + project_name, case_study, rev, patch_name + ) + + for profiler in profilers: + new_row = { + 'CaseStudy': + project_name, + 'Patch': + patch_name, + 'Configs': + len(case_study.get_config_ids_for_revision(rev)), + 'RegressedConfigs': + len(map_to_positive_config_ids(ground_truth)) + if ground_truth else -1 + } + + # TODO: multiple patch cycles + predicted = compute_profiler_predictions( + profiler, project_name, case_study, + case_study.get_config_ids_for_revision(rev), patch_name + ) + + if ground_truth and predicted: + results = ClassificationResults( + map_to_positive_config_ids(ground_truth), + map_to_negative_config_ids(ground_truth), + map_to_positive_config_ids(predicted), + map_to_negative_config_ids(predicted) + ) + + new_row['precision'] = results.precision() + new_row['recall'] = results.recall() + new_row['f1_score'] = results.f1_score() + new_row['Profiler'] = profiler.name + # new_row[f"{profiler.name}_precision" + # ] = results.precision() + # new_row[f"{profiler.name}_recall"] = results.recall() + # new_row[f"{profiler.name}_baccuracy" + # ] = results.balanced_accuracy() + else: + new_row['precision'] = np.nan + new_row['recall'] = np.nan + new_row['f1_score'] = np.nan + new_row['Profiler'] = profiler.name + + print(f"{new_row=}") + table_rows_plot.append(new_row) + # df.append(new_row, ignore_index=True) + + df = pd.DataFrame() + return pd.concat([df, pd.DataFrame(table_rows_plot)]) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index ee325cf37..5df20ed7d 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -19,6 +19,7 @@ compute_profiler_predictions, Baseline, OverheadData, + load_precision_data, ) from varats.data.metrics import ClassificationResults from varats.paper.case_study import CaseStudy @@ -60,62 +61,7 @@ def plot(self, view_mode: bool) -> None: # Data aggregation df = pd.DataFrame() - table_rows_plot = [] - - for case_study in case_studies: - for patch_name in get_patch_names(case_study): - rev = case_study.revisions[0] - project_name = case_study.project_name - - ground_truth = get_regressing_config_ids_gt( - project_name, case_study, rev, patch_name - ) - - for profiler in profilers: - new_row = { - 'CaseStudy': - project_name, - 'Patch': - patch_name, - 'Configs': - len(case_study.get_config_ids_for_revision(rev)), - 'RegressedConfigs': - len(map_to_positive_config_ids(ground_truth)) - if ground_truth else -1 - } - - # TODO: multiple patch cycles - predicted = compute_profiler_predictions( - profiler, project_name, case_study, - case_study.get_config_ids_for_revision(rev), patch_name - ) - - if ground_truth and predicted: - results = ClassificationResults( - map_to_positive_config_ids(ground_truth), - map_to_negative_config_ids(ground_truth), - map_to_positive_config_ids(predicted), - map_to_negative_config_ids(predicted) - ) - - new_row['precision'] = results.precision() - new_row['recall'] = results.recall() - new_row['Profiler'] = profiler.name - # new_row[f"{profiler.name}_precision" - # ] = results.precision() - # new_row[f"{profiler.name}_recall"] = results.recall() - # new_row[f"{profiler.name}_baccuracy" - # ] = results.balanced_accuracy() - else: - new_row['precision'] = np.nan - new_row['recall'] = np.nan - new_row['Profiler'] = profiler.name - - print(f"{new_row=}") - table_rows_plot.append(new_row) - # df.append(new_row, ignore_index=True) - - df = pd.concat([df, pd.DataFrame(table_rows_plot)]) + df = load_precision_data(case_studies, profilers) df = pd.concat([df, pd.DataFrame(get_fake_prec_rows())]) df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") @@ -312,65 +258,7 @@ def plot(self, view_mode: bool) -> None: profilers: tp.List[Profiler] = [VXray(), PIMTracer()] # Data aggregation - df = pd.DataFrame() - table_rows_plot = [] - - for case_study in case_studies: - for patch_name in get_patch_names(case_study): - rev = case_study.revisions[0] - project_name = case_study.project_name - - ground_truth = get_regressing_config_ids_gt( - project_name, case_study, rev, patch_name - ) - - for profiler in profilers: - new_row = { - 'CaseStudy': - project_name, - 'Patch': - patch_name, - 'Configs': - len(case_study.get_config_ids_for_revision(rev)), - 'RegressedConfigs': - len(map_to_positive_config_ids(ground_truth)) - if ground_truth else -1 - } - - # TODO: multiple patch cycles - predicted = compute_profiler_predictions( - profiler, project_name, case_study, - case_study.get_config_ids_for_revision(rev), patch_name - ) - - if ground_truth and predicted: - results = ClassificationResults( - map_to_positive_config_ids(ground_truth), - map_to_negative_config_ids(ground_truth), - map_to_positive_config_ids(predicted), - map_to_negative_config_ids(predicted) - ) - - new_row['precision'] = results.precision() - new_row['recall'] = results.recall() - new_row['f1_score'] = results.f1_score() - new_row['Profiler'] = profiler.name - # new_row[f"{profiler.name}_precision" - # ] = results.precision() - # new_row[f"{profiler.name}_recall"] = results.recall() - # new_row[f"{profiler.name}_baccuracy" - # ] = results.balanced_accuracy() - else: - new_row['precision'] = np.nan - new_row['recall'] = np.nan - new_row['f1_score'] = np.nan - new_row['Profiler'] = profiler.name - - print(f"{new_row=}") - table_rows_plot.append(new_row) - # df.append(new_row, ignore_index=True) - - df = pd.concat([df, pd.DataFrame(table_rows_plot)]) + df = load_precision_data(case_studies, profilers) # df = pd.concat([df, pd.DataFrame(get_fake_prec_rows_overhead())]) df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") From 4003e60c2c23c63b7c3d472657866543ad5f2c8b Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 22 Jul 2023 23:19:22 +0200 Subject: [PATCH 071/224] Implements correct file loading for overhead data --- .../feature_perf_precision_database.py | 63 +++++++++++++++++- varats/varats/plots/feature_perf_precision.py | 64 +++++++++++++------ 2 files changed, 105 insertions(+), 22 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index e5e5a7c4a..b7b70cfc7 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -464,7 +464,7 @@ def compute_overhead_data( return OverheadData(profiler, mean_time, mean_cxt_switches) -def load_precision_data(case_studies, profilers): +def load_precision_data(case_studies, profilers) -> pd.DataFrame: table_rows_plot = [] for case_study in case_studies: for patch_name in get_patch_names(case_study): @@ -521,5 +521,62 @@ def load_precision_data(case_studies, profilers): table_rows_plot.append(new_row) # df.append(new_row, ignore_index=True) - df = pd.DataFrame() - return pd.concat([df, pd.DataFrame(table_rows_plot)]) + return pd.DataFrame(table_rows_plot) + + +def load_overhead_data(case_studies, profilers) -> pd.DataFrame: + table_rows = [] + + for case_study in case_studies: + rev = case_study.revisions[0] + project_name = case_study.project_name + + overhead_ground_truth = OverheadData.compute_overhead_data( + Baseline(), case_study, rev + ) + if not overhead_ground_truth: + print(f"No baseline data for {case_study.project_name}, skipping") + continue + + new_row = { + 'CaseStudy': project_name, + 'Profiler': "Base", + 'time': + overhead_ground_truth.mean_time(), # random.randint(2, 230), + 'ctx': overhead_ground_truth.mean_ctx(), + 'overhead_time': 0, + 'overhead_ctx': 0 + } + + table_rows.append(new_row) + + for profiler in profilers: + profiler_overhead = OverheadData.compute_overhead_data( + profiler, case_study, rev + ) + + new_row = {'CaseStudy': project_name, 'Profiler': profiler.name} + + if profiler_overhead: + time_diff = profiler_overhead.config_wise_time_diff( + overhead_ground_truth + ) + ctx_diff = profiler_overhead.config_wise_ctx_diff( + overhead_ground_truth + ) + + new_row['time'] = profiler_overhead.mean_time() + new_row['overhead_time'] = np.mean(list(time_diff.values())) + + new_row['ctx'] = profiler_overhead.mean_ctx() + new_row['overhead_ctx'] = np.mean(list(ctx_diff.values())) + else: + new_row['time'] = np.nan + new_row['overhead_time'] = np.nan + + new_row['ctx'] = np.nan + new_row['overhead_ctx'] = np.nan + + table_rows.append(new_row) + + return pd.DataFrame(table_rows) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 5df20ed7d..a6c4eab28 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -20,12 +20,13 @@ Baseline, OverheadData, load_precision_data, + load_overhead_data, ) from varats.data.metrics import ClassificationResults from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config from varats.plot.plot import Plot -from varats.plot.plots import PlotGenerator +from varats.plot.plots import PlotConfig, PlotGenerator from varats.plots.scatter_plot_utils import multivariate_grid from varats.utils.exceptions import UnsupportedOperation from varats.utils.git_util import FullCommitHash @@ -179,6 +180,12 @@ def get_fake_overhead_better_rows(): class PerfOverheadPlot(Plot, plot_name='fperf_overhead'): + def __init__( + self, target_metric, plot_config: PlotConfig, **kwargs: tp.Any + ) -> None: + super().__init__(plot_config, **kwargs) + self.__target_metric = target_metric + def other_frame(self): case_studies = get_loaded_paper_config().get_all_case_studies() profilers: tp.List[Profiler] = [VXray(), PIMTracer()] @@ -275,12 +282,22 @@ def plot(self, view_mode: bool) -> None: print(f"{sub_df=}") # other_df = self.other_frame() - other_df = pd.DataFrame() - other_df = pd.concat([ - other_df, pd.DataFrame(get_fake_overhead_better_rows()) - ]) + # other_df = pd.DataFrame() + # other_df = pd.concat([ + # other_df, pd.DataFrame(get_fake_overhead_better_rows()) + # ]) # other_df = other_df.groupby(['CaseStudy', 'Profiler']) - print(f"{other_df=}") + # print(f"other_df=\n{other_df}") + other_df = load_overhead_data(case_studies, profilers) + print(f"other_df=\n{other_df}") + other_df['overhead_time_rel'] = other_df['time'] / ( + other_df['time'] - other_df['overhead_time'] + ) * 100 + + other_df['overhead_ctx_rel'] = other_df['ctx'] / ( + other_df['ctx'] - other_df['overhead_ctx'] + ) * 100 + print(f"other_df=\n{other_df}") target_row = "f1_score" # target_row = "precision" @@ -289,9 +306,18 @@ def plot(self, view_mode: bool) -> None: final_df = pd.merge(sub_df, other_df, on=["CaseStudy", "Profiler"]) print(f"{final_df=}") + if self.__target_metric == "time": + plot_extra_name = "Time" + x_values = "overhead_time_rel" + elif self.__target_metric == "ctx": + plot_extra_name = "Ctx" + x_values = "overhead_ctx_rel" + else: + raise NotImplementedError() + ax = sns.scatterplot( final_df, - x='overhead_time', + x=x_values, y=target_row, hue="Profiler", style='CaseStudy', @@ -325,29 +351,26 @@ def plot(self, view_mode: bool) -> None: # ax.legend().set_bbox_to_anchor((1, 0.5)) # grid.ax_marg_x.set_xlim(0.0, 1.01) - ax.set_xlabel("Overhead in %") + ax.set_xlabel(f"{plot_extra_name} Overhead in %") if target_row == "f1_score": ax.set_ylabel("F1-Score") # ax.set_ylim(np.max(final_df['overhead_time']) + 20, 0) ax.set_ylim(0.0, 1.02) # ax.set_xlim(0, np.max(final_df['overhead_time']) + 20) - ax.set_xlim(np.max(final_df['overhead_time']) + 20, 0) + ax.set_xlim(np.max(final_df[x_values]) + 20, 0) # ax.set_xlim(1.01, 0.0) ax.xaxis.label.set_size(20) ax.yaxis.label.set_size(20) ax.tick_params(labelsize=15) - prof_df = final_df[[ - 'Profiler', 'precision', 'overhead_time', 'f1_score' - ]].groupby('Profiler').agg(['mean', 'std']) + prof_df = final_df[['Profiler', 'precision', x_values, 'f1_score' + ]].groupby('Profiler').agg(['mean', 'std']) prof_df.fillna(0, inplace=True) print(f"{prof_df=}") p = self.plot_pareto_frontier( - prof_df['overhead_time']['mean'], - prof_df[target_row]['mean'], - maxX=False + prof_df[x_values]['mean'], prof_df[target_row]['mean'], maxX=False ) # p = self.plot_pareto_frontier_std( # prof_df['overhead_time']['mean'], @@ -362,9 +385,9 @@ def plot(self, view_mode: bool) -> None: # pf_x_error = [pair[2] for pair in p] # pf_y_error = [pair[3] for pair in p] - x_loc = prof_df['overhead_time']['mean'] + x_loc = prof_df[x_values]['mean'] y_loc = prof_df[target_row]['mean'] - x_error = prof_df['overhead_time']['std'] + x_error = prof_df[x_values]['std'] y_error = prof_df[target_row]['std'] ax.errorbar( @@ -382,7 +405,7 @@ def plot(self, view_mode: bool) -> None: sns.scatterplot( prof_df, - x=('overhead_time', 'mean'), + x=(x_values, 'mean'), y=(target_row, 'mean'), hue="Profiler", ax=ax, @@ -465,4 +488,7 @@ class PerfOverheadPlotGenerator( def generate(self) -> tp.List[Plot]: - return [PerfOverheadPlot(self.plot_config, **self.plot_kwargs)] + return [ + PerfOverheadPlot(metric, self.plot_config, **self.plot_kwargs) + for metric in ["time", "ctx"] + ] From 7669a0989022f3c62f0a7ce8f5fcb2c600dbc39e Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 22 Jul 2023 23:28:22 +0200 Subject: [PATCH 072/224] Removes "stuff" --- varats/varats/plots/feature_perf_precision.py | 246 ++++-------------- 1 file changed, 47 insertions(+), 199 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index a6c4eab28..3ae353a30 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -5,25 +5,15 @@ import numpy as np import pandas as pd import seaborn as sns -from matplotlib import pyplot as plt from matplotlib.text import Text from varats.data.databases.feature_perf_precision_database import ( Profiler, - get_regressing_config_ids_gt, VXray, PIMTracer, - get_patch_names, - map_to_positive_config_ids, - map_to_negative_config_ids, - compute_profiler_predictions, - Baseline, - OverheadData, load_precision_data, load_overhead_data, ) -from varats.data.metrics import ClassificationResults -from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config from varats.plot.plot import Plot from varats.plot.plots import PlotConfig, PlotGenerator @@ -63,7 +53,7 @@ def plot(self, view_mode: bool) -> None: # Data aggregation df = pd.DataFrame() df = load_precision_data(case_studies, profilers) - df = pd.concat([df, pd.DataFrame(get_fake_prec_rows())]) + # df = pd.concat([df, pd.DataFrame(get_fake_prec_rows())]) df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") @@ -186,126 +176,11 @@ def __init__( super().__init__(plot_config, **kwargs) self.__target_metric = target_metric - def other_frame(self): - case_studies = get_loaded_paper_config().get_all_case_studies() - profilers: tp.List[Profiler] = [VXray(), PIMTracer()] - - # Data aggregation - df = pd.DataFrame() - table_rows = [] - - for case_study in case_studies: - rev = case_study.revisions[0] - project_name = case_study.project_name - - overhead_ground_truth = OverheadData.compute_overhead_data( - Baseline(), case_study, rev - ) - if not overhead_ground_truth: - print( - f"No baseline data for {case_study.project_name}, skipping" - ) - continue - - new_row = { - 'CaseStudy': project_name, - 'WithoutProfiler_mean_time': overhead_ground_truth.mean_time(), - 'WithoutProfiler_mean_ctx': overhead_ground_truth.mean_ctx() - } - - for profiler in profilers: - profiler_overhead = OverheadData.compute_overhead_data( - profiler, case_study, rev - ) - if profiler_overhead: - time_diff = profiler_overhead.config_wise_time_diff( - overhead_ground_truth - ) - ctx_diff = profiler_overhead.config_wise_ctx_diff( - overhead_ground_truth - ) - print(f"{time_diff=}") - new_row[f"{profiler.name}_time_mean"] = np.mean( - list(time_diff.values()) - ) - new_row[f"{profiler.name}_time_std"] = np.std( - list(time_diff.values()) - ) - new_row[f"{profiler.name}_time_max"] = np.max( - list(time_diff.values()) - ) - new_row[f"{profiler.name}_ctx_mean"] = np.mean( - list(ctx_diff.values()) - ) - new_row[f"{profiler.name}_ctx_std"] = np.std( - list(ctx_diff.values()) - ) - new_row[f"{profiler.name}_ctx_max"] = np.max( - list(ctx_diff.values()) - ) - else: - new_row[f"{profiler.name}_time_mean"] = np.nan - new_row[f"{profiler.name}_time_std"] = np.nan - new_row[f"{profiler.name}_time_max"] = np.nan - - new_row[f"{profiler.name}_ctx_mean"] = np.nan - new_row[f"{profiler.name}_ctx_std"] = np.nan - new_row[f"{profiler.name}_ctx_max"] = np.nan - - table_rows.append(new_row) - # df.append(new_row, ignore_index=True) - - df = pd.concat([df, pd.DataFrame(table_rows)]) - df.sort_values(["CaseStudy"], inplace=True) - # print(f"{df=}") - return df - def plot(self, view_mode: bool) -> None: - case_studies = get_loaded_paper_config().get_all_case_studies() - profilers: tp.List[Profiler] = [VXray(), PIMTracer()] - - # Data aggregation - df = load_precision_data(case_studies, profilers) - # df = pd.concat([df, pd.DataFrame(get_fake_prec_rows_overhead())]) - df.sort_values(["CaseStudy"], inplace=True) - print(f"{df=}") - - sub_df = df[[ - "CaseStudy", "precision", "recall", "Profiler", "f1_score" - ]] - sub_df = sub_df.groupby(['CaseStudy', "Profiler"], as_index=False).agg({ - 'precision': 'mean', - 'recall': 'mean', - 'f1_score': 'mean' - }) - - print(f"{sub_df=}") - - # other_df = self.other_frame() - # other_df = pd.DataFrame() - # other_df = pd.concat([ - # other_df, pd.DataFrame(get_fake_overhead_better_rows()) - # ]) - # other_df = other_df.groupby(['CaseStudy', 'Profiler']) - # print(f"other_df=\n{other_df}") - other_df = load_overhead_data(case_studies, profilers) - print(f"other_df=\n{other_df}") - other_df['overhead_time_rel'] = other_df['time'] / ( - other_df['time'] - other_df['overhead_time'] - ) * 100 - - other_df['overhead_ctx_rel'] = other_df['ctx'] / ( - other_df['ctx'] - other_df['overhead_ctx'] - ) * 100 - print(f"other_df=\n{other_df}") - + # -- Configure plot -- target_row = "f1_score" # target_row = "precision" - # final_df = sub_df.join(other_df, on=["CaseStudy", "Profiler"]) - final_df = pd.merge(sub_df, other_df, on=["CaseStudy", "Profiler"]) - print(f"{final_df=}") - if self.__target_metric == "time": plot_extra_name = "Time" x_values = "overhead_time_rel" @@ -315,8 +190,41 @@ def plot(self, view_mode: bool) -> None: else: raise NotImplementedError() + # Load data + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer()] + + # Data aggregation + full_precision_df = load_precision_data(case_studies, profilers) + full_precision_df.sort_values(["CaseStudy"], inplace=True) + + precision_df = full_precision_df[[ + "CaseStudy", "precision", "recall", "Profiler", "f1_score" + ]] + precision_df = precision_df.groupby(['CaseStudy', "Profiler"], + as_index=False).agg({ + 'precision': 'mean', + 'recall': 'mean', + 'f1_score': 'mean' + }) + print(f"precision_df=\n{precision_df}") + + overhead_df = load_overhead_data(case_studies, profilers) + overhead_df['overhead_time_rel'] = overhead_df['time'] / ( + overhead_df['time'] - overhead_df['overhead_time'] + ) * 100 + overhead_df['overhead_ctx_rel'] = overhead_df['ctx'] / ( + overhead_df['ctx'] - overhead_df['overhead_ctx'] + ) * 100 + print(f"other_df=\n{overhead_df}") + + merged_df = pd.merge( + precision_df, overhead_df, on=["CaseStudy", "Profiler"] + ) + print(f"{merged_df=}") + ax = sns.scatterplot( - final_df, + merged_df, x=x_values, y=target_row, hue="Profiler", @@ -325,16 +233,6 @@ def plot(self, view_mode: bool) -> None: s=100 ) - print(f"{ax.legend()=}") - print(f"{type(ax.legend())=}") - print(f"{ax.legend().get_children()=}") - print(f"{ax.legend().prop=}") - print(f"{ax.legend().get_title()}") - print(f"{ax.legend().get_lines()}") - print(f"{ax.legend().get_patches()}") - print(f"{ax.legend().get_texts()}") - ax.legend().set_title("Walrus") - for text_obj in ax.legend().get_texts(): text_obj: Text @@ -348,42 +246,27 @@ def plot(self, view_mode: bool) -> None: text_obj.set_text("Subject Systems") text_obj.set_fontweight("bold") - # ax.legend().set_bbox_to_anchor((1, 0.5)) - - # grid.ax_marg_x.set_xlim(0.0, 1.01) ax.set_xlabel(f"{plot_extra_name} Overhead in %") if target_row == "f1_score": ax.set_ylabel("F1-Score") - # ax.set_ylim(np.max(final_df['overhead_time']) + 20, 0) ax.set_ylim(0.0, 1.02) - # ax.set_xlim(0, np.max(final_df['overhead_time']) + 20) - ax.set_xlim(np.max(final_df[x_values]) + 20, 0) - # ax.set_xlim(1.01, 0.0) + ax.set_xlim(np.max(merged_df[x_values]) + 20, 100) ax.xaxis.label.set_size(20) ax.yaxis.label.set_size(20) ax.tick_params(labelsize=15) - prof_df = final_df[['Profiler', 'precision', x_values, 'f1_score' - ]].groupby('Profiler').agg(['mean', 'std']) + prof_df = merged_df[['Profiler', 'precision', x_values, 'f1_score' + ]].groupby('Profiler').agg(['mean', 'std']) prof_df.fillna(0, inplace=True) print(f"{prof_df=}") - p = self.plot_pareto_frontier( + pareto_front = self.plot_pareto_frontier( prof_df[x_values]['mean'], prof_df[target_row]['mean'], maxX=False ) - # p = self.plot_pareto_frontier_std( - # prof_df['overhead_time']['mean'], - # prof_df[target_row]['mean'], - # prof_df['overhead_time']['std'], - # prof_df[target_row]['std'], - # maxX=False - # ) - - pf_x = [pair[0] for pair in p] - pf_y = [pair[1] for pair in p] - # pf_x_error = [pair[2] for pair in p] - # pf_y_error = [pair[3] for pair in p] + + pf_x = [pair[0] for pair in pareto_front] + pf_y = [pair[1] for pair in pareto_front] x_loc = prof_df[x_values]['mean'] y_loc = prof_df[target_row]['mean'] @@ -391,10 +274,10 @@ def plot(self, view_mode: bool) -> None: y_error = prof_df[target_row]['std'] ax.errorbar( - x_loc, # pf_x, - y_loc, # pf_y, - xerr=x_error, # xerr=pf_x_error, - yerr=y_error, # yerr=pf_y_error, + x_loc, + y_loc, + xerr=x_error, + yerr=y_error, fmt='none', color='grey', zorder=0, @@ -414,16 +297,6 @@ def plot(self, view_mode: bool) -> None: zorder=2 ) - # p = self.plot_pareto_frontier( - # final_df['precision'], final_df['overhead_time'] - # ) - - # print(f"""{pf_x=} - # {pf_y=} - # {pf_x_error=} - # {pf_y_error=} - # """) - # plt.plot(pf_x, pf_y) sns.lineplot( x=pf_x, y=pf_y, @@ -434,10 +307,6 @@ def plot(self, view_mode: bool) -> None: zorder=1 ) - # def_totals = pd.DataFrame() - # def_totals.loc['mean'] = [1, 2, 23] - # print(f"{def_totals=}") - def plot_pareto_frontier(self, Xs, Ys, maxX=True, maxY=True): """Pareto frontier selection process.""" sorted_list = sorted([[Xs[i], Ys[i]] for i in range(len(Xs))], @@ -455,27 +324,6 @@ def plot_pareto_frontier(self, Xs, Ys, maxX=True, maxY=True): return pareto_front - def plot_pareto_frontier_std( - self, Xs, Ys, Xstds, Ystds, maxX=True, maxY=True - ): - """Pareto frontier selection process.""" - sorted_list = sorted([ - [Xs[i], Ys[i], Xstds[i], Ystds[i]] for i in range(len(Xs)) - ], - reverse=maxX) - print(f"{sorted_list=}") - pareto_front = [sorted_list[0]] - for pair in sorted_list[1:]: - print(f"{pair=}") - if maxY: - if pair[1] >= pareto_front[-1][1]: - pareto_front.append(pair) - else: - if pair[1] <= pareto_front[-1][1]: - pareto_front.append(pair) - - return pareto_front - def calc_missing_revisions( self, boundary_gradient: float ) -> tp.Set[FullCommitHash]: From 65c368b2156dc50dd3acb3ba9b74a2b0a38dc9d5 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 22 Jul 2023 23:33:28 +0200 Subject: [PATCH 073/224] Removes debug prints and updates error messages --- .../feature_perf_precision_database.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index b7b70cfc7..a366b332d 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -176,9 +176,9 @@ def is_regression( # TODO: check, maybe we need a "very small value cut off" if ttest_res.pvalue < 0.05: - print( - f"{self.name} found regression for feature {feature}." - ) + # print( + # f"{self.name} found regression for feature {feature}." + # ) is_regression = True else: print(f"Could not find feature {feature} in new trace.") @@ -242,9 +242,9 @@ def is_regression( # TODO: check, maybe we need a "very small value cut off" if ttest_res.pvalue < 0.05: - print( - f"{self.name} found regression for feature {feature}." - ) + # print( + # f"{self.name} found regression for feature {feature}." + # ) is_regression = True else: print(f"Could not find feature {feature} in new trace.") @@ -266,7 +266,10 @@ def get_patch_names(case_study: CaseStudy) -> tp.List[str]: if len(report_files) > 1: raise AssertionError("Should only be one") if not report_files: - print("Could not find profiling data. config_id=0, profiler=Baseline") + print( + f"Could not find profiling data for {case_study.project_name}" + ". config_id=0, profiler=Baseline" + ) return [] # TODO: fix to prevent double loading @@ -295,8 +298,8 @@ def get_regressing_config_ids_gt( raise AssertionError("Should only be one") if not report_files: print( - f"Could not find profiling data. {config_id=}, " - f"profiler=Baseline" + f"Could not find profiling data for {case_study.project_name}." + f" {config_id=}, profiler=Baseline" ) return None @@ -517,9 +520,7 @@ def load_precision_data(case_studies, profilers) -> pd.DataFrame: new_row['f1_score'] = np.nan new_row['Profiler'] = profiler.name - print(f"{new_row=}") table_rows_plot.append(new_row) - # df.append(new_row, ignore_index=True) return pd.DataFrame(table_rows_plot) From 163cac94df0bd3bcde37a1e62c609da92e3d1ad1 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 23 Jul 2023 00:07:11 +0200 Subject: [PATCH 074/224] Convert to multiplot --- varats/varats/plots/feature_perf_precision.py | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 3ae353a30..1fef3ae92 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -2,6 +2,7 @@ import random import typing as tp +import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns @@ -170,26 +171,13 @@ def get_fake_overhead_better_rows(): class PerfOverheadPlot(Plot, plot_name='fperf_overhead'): - def __init__( - self, target_metric, plot_config: PlotConfig, **kwargs: tp.Any - ) -> None: - super().__init__(plot_config, **kwargs) - self.__target_metric = target_metric - def plot(self, view_mode: bool) -> None: # -- Configure plot -- + plot_metric = [("Time", "overhead_time_rel"), + ("Ctx", "overhead_ctx_rel")] target_row = "f1_score" # target_row = "precision" - if self.__target_metric == "time": - plot_extra_name = "Time" - x_values = "overhead_time_rel" - elif self.__target_metric == "ctx": - plot_extra_name = "Ctx" - x_values = "overhead_ctx_rel" - else: - raise NotImplementedError() - # Load data case_studies = get_loaded_paper_config().get_all_case_studies() profilers: tp.List[Profiler] = [VXray(), PIMTracer()] @@ -223,14 +211,37 @@ def plot(self, view_mode: bool) -> None: ) print(f"{merged_df=}") - ax = sns.scatterplot( + # print(f"{self.plot_config.width()}") + + _, axes = plt.subplots( + ncols=len(plot_metric), nrows=1, figsize=(20, 10) + ) + + if len(plot_metric) == 1: + self.do_single_plot( + plot_metric[0][1], target_row, merged_df, plot_metric[0][0], + axes + ) + else: + for idx, ax in enumerate(axes): + self.do_single_plot( + plot_metric[idx][1], target_row, merged_df, + plot_metric[idx][0], ax + ) + + def do_single_plot( + self, x_values, target_row, merged_df, plot_extra_name, ax + ) -> None: + # ax = + sns.scatterplot( merged_df, x=x_values, y=target_row, hue="Profiler", style='CaseStudy', alpha=0.5, - s=100 + s=100, + ax=ax ) for text_obj in ax.legend().get_texts(): @@ -335,8 +346,4 @@ class PerfOverheadPlotGenerator( ): def generate(self) -> tp.List[Plot]: - - return [ - PerfOverheadPlot(metric, self.plot_config, **self.plot_kwargs) - for metric in ["time", "ctx"] - ] + return [PerfOverheadPlot(self.plot_config, **self.plot_kwargs)] From 05b02ed59fc280660521f352ba81b5c19d376930 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 23 Jul 2023 09:23:36 +0200 Subject: [PATCH 075/224] Fixes typing issue --- varats-core/varats/experiment/experiment_util.py | 9 +++++++-- varats/varats/experiments/vara/feature_perf_precision.py | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index dfc628e83..eb20d92a3 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -521,7 +521,12 @@ def call_with_output_folder(self, tmp_dir: Path) -> StepResult: """Actual call implementation that gets a path to tmp_folder.""" -class ZippedExperimentSteps(MultiStep[tp.Union[OutputFolderStep, ProjectStep]]): +ZippedStepTy = tp.TypeVar( + "ZippedStepTy", bound=tp.Union[OutputFolderStep, ProjectStep] +) + + +class ZippedExperimentSteps(MultiStep[ZippedStepTy]): """Runs multiple actions, providing them a shared tmp folder that afterwards is zipped into an archive.""" @@ -530,7 +535,7 @@ class ZippedExperimentSteps(MultiStep[tp.Union[OutputFolderStep, ProjectStep]]): def __init__( self, output_filepath: ReportFilepath, - actions: tp.Optional[tp.List[tp.Union[OutputFolderStep, ProjectStep]]] + actions: tp.Optional[tp.List[ZippedStepTy]] ) -> None: super().__init__(actions) self.__output_filepath = output_filepath diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 1f7e46cdb..58105387e 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -2,6 +2,7 @@ measurement support of vara.""" import textwrap import typing as tp +from abc import abstractmethod from pathlib import Path import benchbuild.extensions as bb_ext @@ -63,6 +64,10 @@ def __init__( self._file_name = file_name self._reps = reps + @abstractmethod + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + """Actual call implementation that gets a path to tmp_folder.""" + class MPRTRA( MultiPatchReport[TimeReportAggregate], shorthand="MPRTRA", file_type=".zip" From 59b63083c3aff70810f961492fdd8e27efdf3a87 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 23 Jul 2023 13:01:10 +0200 Subject: [PATCH 076/224] Fixes pareto for equal x-values --- varats/varats/plots/feature_perf_precision.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 1fef3ae92..2e9723b7a 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -325,13 +325,22 @@ def plot_pareto_frontier(self, Xs, Ys, maxX=True, maxY=True): print(f"{sorted_list=}") pareto_front = [sorted_list[0]] for pair in sorted_list[1:]: - print(f"{pair=}") if maxY: if pair[1] >= pareto_front[-1][1]: - pareto_front.append(pair) + if pair[0] == pareto_front[-1][0]: + # If both points, have the same x-values, we should + # only keep the larger one + pareto_front[-1][1] = pair[1] + else: + pareto_front.append(pair) else: if pair[1] <= pareto_front[-1][1]: - pareto_front.append(pair) + if pair[0] == pareto_front[-1][0]: + # If both points, have the same x-values, we should + # only keep the smaller one + pareto_front[-1][1] = pair[1] + else: + pareto_front.append(pair) return pareto_front From 3843b5945ea8bff304381e60903c41b965d92daf Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 23 Jul 2023 13:16:04 +0200 Subject: [PATCH 077/224] Implements bpftrace experiment --- .../feature_perf_precision_database.py | 66 ++++- .../vara/feature_perf_precision.py | 255 ++++++++++++++++-- varats/varats/plots/feature_perf_precision.py | 5 +- 3 files changed, 298 insertions(+), 28 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index a366b332d..8d68770bc 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -140,7 +140,7 @@ class VXray(Profiler): def __init__(self) -> None: super().__init__( "WXray", fpp.TEFProfileRunner, fpp.TEFProfileOverheadRunner, - fpp.MPRTEFA + fpp.MPRTEFAggregate ) def is_regression( @@ -195,7 +195,7 @@ class PIMTracer(Profiler): def __init__(self) -> None: super().__init__( "PIMTracer", fpp.PIMProfileRunner, fpp.PIMProfileOverheadRunner, - fpp.MPRPIMA + fpp.MPRPIMAggregate ) def is_regression( @@ -254,11 +254,65 @@ def is_regression( return is_regression +class EbpfTraceTEF(Profiler): + """Profiler mapper implementation for the vara tef tracer.""" + + def __init__(self) -> None: + super().__init__( + "eBPFTrace", fpp.EbpfTraceTEFProfileRunner, + fpp.TEFProfileOverheadRunner, fpp.MPRTEFAggregate + ) + + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + """Checks if there was a regression between the old an new data.""" + is_regression = False + + multi_report = fpp.MultiPatchReport( + report_path.full_path(), TEFReportAggregate + ) + + old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for old_tef_report in multi_report.get_baseline_report().reports(): + pim = get_feature_performance_from_tef_report(old_tef_report) + for feature, value in pim.items(): + old_acc_pim[feature].append(value) + + new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() + + for new_tef_report in opt_mr.reports(): + pim = get_feature_performance_from_tef_report(new_tef_report) + for feature, value in pim.items(): + new_acc_pim[feature].append(value) + + for feature, old_values in old_acc_pim.items(): + if feature in new_acc_pim: + new_values = new_acc_pim[feature] + ttest_res = ttest_ind(old_values, new_values) + + # TODO: check, maybe we need a "very small value cut off" + if ttest_res.pvalue < 0.05: + # print( + # f"{self.name} found regression for feature {feature}." + # ) + is_regression = True + else: + print(f"Could not find feature {feature} in new trace.") + # TODO: how to handle this? + is_regression = True + + return is_regression + + def get_patch_names(case_study: CaseStudy) -> tp.List[str]: report_files = get_processed_revisions_files( case_study.project_name, fpp.BlackBoxBaselineRunner, - fpp.MPRTRA, + fpp.MPRTimeReportAggregate, get_case_study_file_name_filter(case_study), config_id=0 ) @@ -273,7 +327,7 @@ def get_patch_names(case_study: CaseStudy) -> tp.List[str]: return [] # TODO: fix to prevent double loading - time_reports = fpp.MPRTRA(report_files[0].full_path()) + time_reports = fpp.MPRTimeReportAggregate(report_files[0].full_path()) return time_reports.get_patch_names() @@ -290,7 +344,7 @@ def get_regressing_config_ids_gt( report_files = get_processed_revisions_files( project_name, fpp.BlackBoxBaselineRunner, - fpp.MPRTRA, + fpp.MPRTimeReportAggregate, get_case_study_file_name_filter(case_study), config_id=config_id ) @@ -304,7 +358,7 @@ def get_regressing_config_ids_gt( return None # TODO: fix to prevent double loading - time_reports = fpp.MPRTRA(report_files[0].full_path()) + time_reports = fpp.MPRTimeReportAggregate(report_files[0].full_path()) old_time = time_reports.get_baseline_report() # new_time = time_reports.get_new_report() diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 58105387e..d96b69ddc 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -4,13 +4,15 @@ import typing as tp from abc import abstractmethod from pathlib import Path +from time import sleep import benchbuild.extensions as bb_ext from benchbuild.command import cleanup from benchbuild.utils import actions -from benchbuild.utils.actions import ProjectStep, StepResult, Compile, Clean -from benchbuild.utils.cmd import time -from plumbum import local, ProcessExecutionError +from benchbuild.utils.actions import StepResult, Clean +from benchbuild.utils.cmd import time, rm, cp, numactl, sudo, bpftrace +from plumbum import local, BG +from plumbum.commands.modifiers import Future from varats.data.reports.performance_influence_trace_report import ( PerfInfluenceTraceReportAggregate, @@ -41,10 +43,13 @@ ) from varats.report.gnu_time_report import TimeReportAggregate from varats.report.multi_patch_report import MultiPatchReport -from varats.report.report import ReportSpecification, ReportTy, BaseReport -from varats.report.tef_report import TEFReport, TEFReportAggregate +from varats.report.report import ReportSpecification +from varats.report.tef_report import TEFReportAggregate +from varats.tools.research_tools.vara import VaRA from varats.utils.git_util import ShortCommitHash +REPS = 3 + class AnalysisProjectStepBase(OutputFolderStep): @@ -56,7 +61,7 @@ def __init__( binary: ProjectBinaryWrapper, file_name: str, report_file_ending: str = "json", - reps=2 + reps=REPS ): super().__init__(project=project) self._binary = binary @@ -69,7 +74,7 @@ def call_with_output_folder(self, tmp_dir: Path) -> StepResult: """Actual call implementation that gets a path to tmp_folder.""" -class MPRTRA( +class MPRTimeReportAggregate( MultiPatchReport[TimeReportAggregate], shorthand="MPRTRA", file_type=".zip" ): @@ -77,7 +82,7 @@ def __init__(self, path: Path) -> None: super().__init__(path, TimeReportAggregate) -class MPRTEFA( +class MPRTEFAggregate( MultiPatchReport[TEFReportAggregate], shorthand="MPRTEFA", file_type=".zip" ): @@ -85,11 +90,13 @@ def __init__(self, path: Path) -> None: super().__init__(path, TEFReportAggregate) -class MPRPIMA( +class MPRPIMAggregate( MultiPatchReport[TEFReportAggregate], shorthand="MPRPIMA", file_type=".zip" ): def __init__(self, path: Path) -> None: + # TODO: clean up report handling, we currently parse it as a TEFReport + # as the file looks similar super().__init__(path, PerfInfluenceTraceReportAggregate) @@ -107,7 +114,7 @@ def __init__( binary: ProjectBinaryWrapper, file_name: str, report_file_ending: str = "json", - reps=2 + reps=REPS ): super().__init__(project, binary, file_name, report_file_ending, reps) @@ -152,6 +159,98 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: return StepResult.OK +class RunBPFTracedWorkloads(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunBPFTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + file_name: str, + report_file_ending: str = "json", + reps=REPS + ): + super().__init__(project, binary, file_name, report_file_ending, reps) + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + zip_tmp_dir = tmp_dir / self._file_name + with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: + for rep in range(0, self._reps): + for prj_command in workload_commands( + self.project, self._binary, [WorkloadCategory.EXAMPLE] + ): + local_tracefile_path = Path(reps_tmp_dir) / ( + f"trace_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + + with local.env(VARA_TRACE_FILE=local_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print( + f"Running example {prj_command.command.label}" + ) + + extra_options = get_extra_config_options( + self.project + ) + + bpf_runner = self.attach_usdt_raw_tracing( + local_tracefile_path, + self.project.source_of_primary / + self._binary.path + ) + + with cleanup(prj_command): + pb_cmd( + *extra_options, + retcode=self._binary.valid_exit_codes + ) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() + + return StepResult.OK + + @staticmethod + def attach_usdt_raw_tracing(report_file: Path, binary: Path) -> Future: + """Attach bpftrace script to binary to activate raw USDT probes.""" + bpftrace_script_location = Path( + VaRA.install_location(), + "share/vara/perf_bpf_tracing/RawUsdtTefMarker.bt" + ) + bpftrace_script = bpftrace["-o", report_file, "-q", + bpftrace_script_location, binary] + bpftrace_script = bpftrace_script.with_env(BPFTRACE_PERF_RB_PAGES=4096) + + # Assertion: Can be run without sudo password prompt. + bpftrace_cmd = sudo[bpftrace_script] + # bpftrace_cmd = numactl["--cpunodebind=0", "--membind=0", bpftrace_cmd] + + bpftrace_runner = bpftrace_cmd & BG + # give bpftrace time to start up, requires more time than regular USDT + # script because a large number of probes increases the startup time + sleep(10) + return bpftrace_runner + + def setup_actions_for_vara_experiment( experiment: FeatureExperiment, project: VProject, instr_type: FeatureInstrType, @@ -239,7 +338,7 @@ class TEFProfileRunner(FeatureExperiment, shorthand="TEFp"): NAME = "RunTEFProfiler" - REPORT_SPEC = ReportSpecification(MPRTEFA) + REPORT_SPEC = ReportSpecification(MPRTEFAggregate) def actions_for_project( self, project: VProject @@ -261,7 +360,7 @@ class PIMProfileRunner(FeatureExperiment, shorthand="PIMp"): NAME = "RunPIMProfiler" - REPORT_SPEC = ReportSpecification(MPRPIMA) + REPORT_SPEC = ReportSpecification(MPRPIMAggregate) def actions_for_project( self, project: VProject @@ -279,6 +378,28 @@ def actions_for_project( ) +class EbpfTraceTEFProfileRunner(FeatureExperiment, shorthand="ETEFp"): + """Test runner for feature performance.""" + + NAME = "RunEBPFTraceTEFProfiler" + + REPORT_SPEC = ReportSpecification(MPRTEFAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_experiment( + self, project, FeatureInstrType.USDT_RAW, RunBPFTracedWorkloads + ) + + class RunBackBoxBaseline(OutputFolderStep): # type: ignore """Executes the traced project binaries on the specified workloads.""" @@ -293,7 +414,7 @@ def __init__( binary: ProjectBinaryWrapper, file_name: str, report_file_ending: str = "txt", - reps=2 + reps=REPS ): super().__init__(project=project) self.__binary = binary @@ -346,7 +467,7 @@ class BlackBoxBaselineRunner(FeatureExperiment, shorthand="BBBase"): NAME = "GenBBBaseline" - REPORT_SPEC = ReportSpecification(MPRTRA) + REPORT_SPEC = ReportSpecification(MPRTimeReportAggregate) def actions_for_project( self, project: VProject @@ -402,7 +523,7 @@ def actions_for_project( RunBackBoxBaseline( project, binary, - file_name=MPRTRA.create_patched_report_name( + file_name=MPRTimeReportAggregate.create_patched_report_name( patch, "rep_measurements" ) ) @@ -418,7 +539,7 @@ def actions_for_project( RunBackBoxBaseline( project, binary, - file_name=MPRTRA. + file_name=MPRTimeReportAggregate. create_baseline_report_name("rep_measurements") ) ] + patch_steps @@ -448,7 +569,7 @@ def __init__( binary: ProjectBinaryWrapper, file_name: str, report_file_ending: str = "txt", - reps=2 + reps=REPS ): super().__init__(project, binary, file_name, report_file_ending, reps) @@ -497,13 +618,84 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: return StepResult.OK +class RunBPFTracedWorkloadsOverhead(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + file_name: str, + report_file_ending: str = "txt", + reps=REPS + ): + super().__init__(project, binary, file_name, report_file_ending, reps) + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + for rep in range(0, self._reps): + for prj_command in workload_commands( + self.project, self._binary, [WorkloadCategory.EXAMPLE] + ): + base = Path("/tmp/") + fake_tracefile_path = base / ( + f"trace_{prj_command.command.label}_{rep}" + f".json" + ) + + time_report_file = tmp_dir / ( + f"overhead_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + + with local.env(VARA_TRACE_FILE=fake_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print(f"Running example {prj_command.command.label}") + + timed_pb_cmd = time["-v", "-o", time_report_file, + pb_cmd] + + extra_options = get_extra_config_options(self.project) + + bpf_runner = RunBPFTracedWorkloads.attach_usdt_raw_tracing( + fake_tracefile_path, + self.project.source_of_primary / self._binary.path + ) + + with cleanup(prj_command): + timed_pb_cmd( + *extra_options, + retcode=self._binary.valid_exit_codes + ) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() + + return StepResult.OK + + def setup_actions_for_vara_overhead_experiment( experiment: FeatureExperiment, project: VProject, instr_type: FeatureInstrType, analysis_step: tp.Type[AnalysisProjectStepBase] ) -> tp.MutableSequence[actions.Step]: - instr_type = FeatureInstrType.TEF - project.cflags += experiment.get_vara_feature_cflags(project) threshold = 0 if project.DOMAIN.value is ProjectDomains.TEST else 100 @@ -601,6 +793,29 @@ def actions_for_project( ) +class EbpfTraceTEFOverheadRunner(FeatureExperiment, shorthand="ETEFo"): + """Test runner for feature performance.""" + + NAME = "RunEBPFTraceTEFProfilerO" + + REPORT_SPEC = ReportSpecification(TimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_overhead_experiment( + self, project, FeatureInstrType.USDT_RAW, + RunBPFTracedWorkloadsOverhead + ) + + class RunBackBoxBaselineOverhead(OutputFolderStep): # type: ignore """Executes the traced project binaries on the specified workloads.""" @@ -614,7 +829,7 @@ def __init__( project: VProject, binary: ProjectBinaryWrapper, report_file_ending: str = "txt", - reps=2 + reps=REPS ): super().__init__(project=project) self.__binary = binary diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 2e9723b7a..b54cf231b 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -12,6 +12,7 @@ Profiler, VXray, PIMTracer, + EbpfTraceTEF, load_precision_data, load_overhead_data, ) @@ -49,7 +50,7 @@ class PerfPrecisionPlot(Plot, plot_name='fperf_precision'): def plot(self, view_mode: bool) -> None: case_studies = get_loaded_paper_config().get_all_case_studies() - profilers: tp.List[Profiler] = [VXray(), PIMTracer()] + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] # Data aggregation df = pd.DataFrame() @@ -180,7 +181,7 @@ def plot(self, view_mode: bool) -> None: # Load data case_studies = get_loaded_paper_config().get_all_case_studies() - profilers: tp.List[Profiler] = [VXray(), PIMTracer()] + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] # Data aggregation full_precision_df = load_precision_data(case_studies, profilers) From 4db2211dd5501b73184415a6217902ac4e27532f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 23 Jul 2023 13:51:31 +0200 Subject: [PATCH 078/224] Test experiment for BCC --- .../vara/feature_perf_precision.py | 210 +++++++++++++++++- 1 file changed, 209 insertions(+), 1 deletion(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index d96b69ddc..26498ae38 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -211,7 +211,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: self.project ) - bpf_runner = self.attach_usdt_raw_tracing( + bpf_runner = bpf_runner = self.attach_usdt_raw_tracing( local_tracefile_path, self.project.source_of_primary / self._binary.path @@ -251,6 +251,97 @@ def attach_usdt_raw_tracing(report_file: Path, binary: Path) -> Future: return bpftrace_runner +class RunBCCTracedWorkloads(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunBCCTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + file_name: str, + report_file_ending: str = "json", + reps=REPS + ): + super().__init__(project, binary, file_name, report_file_ending, reps) + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + zip_tmp_dir = tmp_dir / self._file_name + with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: + for rep in range(0, self._reps): + for prj_command in workload_commands( + self.project, self._binary, [WorkloadCategory.EXAMPLE] + ): + local_tracefile_path = Path(reps_tmp_dir) / ( + f"trace_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + + with local.env(VARA_TRACE_FILE=local_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print( + f"Running example {prj_command.command.label}" + ) + + extra_options = get_extra_config_options( + self.project + ) + + bpf_runner = bpf_runner = self.attach_usdt_bcc( + local_tracefile_path, + self.project.source_of_primary / + self._binary.path + ) + + with cleanup(prj_command): + pb_cmd( + *extra_options, + retcode=self._binary.valid_exit_codes + ) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() + + return StepResult.OK + + @staticmethod + def attach_usdt_bcc(report_file: Path, binary: Path) -> Future: + """Attach bcc script to binary to activate USDT probes.""" + bcc_script_location = Path( + VaRA.install_location(), + "share/vara/perf_bpf_tracing/UsdtTefMarker.py" + ) + bcc_script = local[str(bcc_script_location)] + + # Assertion: Can be run without sudo password prompt. + bcc_cmd = bcc_script["--output_file", report_file, "--no_poll", + "--executable", binary] + print(f"{bcc_cmd=}") + bcc_cmd = sudo[bcc_cmd] + # bcc_cmd = numactl["--cpunodebind=0", "--membind=0", bcc_cmd] + + bcc_runner = bcc_cmd & BG + sleep(3) # give bcc script time to start up + return bcc_runner + + def setup_actions_for_vara_experiment( experiment: FeatureExperiment, project: VProject, instr_type: FeatureInstrType, @@ -400,6 +491,28 @@ def actions_for_project( ) +class BCCTEFProfileRunner(FeatureExperiment, shorthand="BCCp"): + """Test runner for feature performance.""" + + NAME = "RunBCCTEFProfiler" + + REPORT_SPEC = ReportSpecification(MPRTEFAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_experiment( + self, project, FeatureInstrType.USDT, RunBCCTracedWorkloads + ) + + class RunBackBoxBaseline(OutputFolderStep): # type: ignore """Executes the traced project binaries on the specified workloads.""" @@ -691,6 +804,79 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: return StepResult.OK +class RunBCCTracedWorkloadsOverhead(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + file_name: str, + report_file_ending: str = "txt", + reps=REPS + ): + super().__init__(project, binary, file_name, report_file_ending, reps) + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + for rep in range(0, self._reps): + for prj_command in workload_commands( + self.project, self._binary, [WorkloadCategory.EXAMPLE] + ): + base = Path("/tmp/") + fake_tracefile_path = base / ( + f"trace_{prj_command.command.label}_{rep}" + f".json" + ) + + time_report_file = tmp_dir / ( + f"overhead_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + + with local.env(VARA_TRACE_FILE=fake_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print(f"Running example {prj_command.command.label}") + + timed_pb_cmd = time["-v", "-o", time_report_file, + pb_cmd] + + extra_options = get_extra_config_options(self.project) + + bpf_runner = RunBCCTracedWorkloads.attach_usdt_bcc( + fake_tracefile_path, + self.project.source_of_primary / self._binary.path + ) + + with cleanup(prj_command): + timed_pb_cmd( + *extra_options, + retcode=self._binary.valid_exit_codes + ) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() + + return StepResult.OK + + def setup_actions_for_vara_overhead_experiment( experiment: FeatureExperiment, project: VProject, instr_type: FeatureInstrType, @@ -816,6 +1002,28 @@ def actions_for_project( ) +class BccTraceTEFOverheadRunner(FeatureExperiment, shorthand="BCCo"): + """Test runner for feature performance.""" + + NAME = "RunBCCTEFProfilerO" + + REPORT_SPEC = ReportSpecification(TimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_overhead_experiment( + self, project, FeatureInstrType.USDT, RunBCCTracedWorkloadsOverhead + ) + + class RunBackBoxBaselineOverhead(OutputFolderStep): # type: ignore """Executes the traced project binaries on the specified workloads.""" From dd1c8a66a2ab72d32c35e009570e2356c743b9cf Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 23 Jul 2023 21:16:44 +0200 Subject: [PATCH 079/224] Implements basic perf report --- tests/report/test_linux_perf_report.py | 81 ++++++++++++ .../varats/report/linux_perf_report.py | 115 ++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 tests/report/test_linux_perf_report.py create mode 100644 varats-core/varats/report/linux_perf_report.py diff --git a/tests/report/test_linux_perf_report.py b/tests/report/test_linux_perf_report.py new file mode 100644 index 000000000..df397276b --- /dev/null +++ b/tests/report/test_linux_perf_report.py @@ -0,0 +1,81 @@ +"""Test LinuxPerfReport.""" + +import unittest +import unittest.mock as mock +from datetime import timedelta +from pathlib import Path + +from varats.report.linux_perf_report import LinuxPerfReport + +PERF_REPORT_1 = """# started on Sun Jul 23 16:33:56 2023 + +0.28;msec;task-clock:u;281620;100.00;0.398;CPUs utilized +0;;context-switches:u;281620;100.00;0.000;/sec +0;;cpu-migrations:u;281620;100.00;0.000;/sec +63;;page-faults:u;281620;100.00;223.706;K/sec +297468;;cycles:u;282100;100.00;1.056;GHz +21086;;stalled-cycles-frontend:u;282100;100.00;7.09;frontend cycles idle +84315;;stalled-cycles-backend:u;282100;100.00;28.34;backend cycles idle +200506;;instructions:u;282100;100.00;0.67;insn per cycle +;;;;;0.42;stalled cycles per insn +48602;;branches:u;282100;100.00;172.580;M/sec +2946;;branch-misses:u;282100;100.00;6.06;of all branches +;;L1-dcache-loads:u;0;0.00;; +;;L1-dcache-load-misses:u;0;0.00;; +;;LLC-loads:u;0;100.00;; +;;LLC-load-misses:u;0;100.00;; +""" + +PERF_REPORT_2 = """# started on Sun Jul 23 16:36:38 2023 + +689.70;msec;task-clock:u;689702567;100.00;0.158;CPUs utilized +0;;context-switches:u;689702567;100.00;0.000;/sec +0;;cpu-migrations:u;689702567;100.00;0.000;/sec +2924;;page-faults:u;689702567;100.00;4.240;K/sec +442557352;;cycles:u;513385825;74.00;0.642;GHz +6447861;;stalled-cycles-frontend:u;513968009;74.00;1.46;frontend cycles idle +120234822;;stalled-cycles-backend:u;517763201;75.00;27.17;backend cycles idle +944044714;;instructions:u;519151351;75.00;2.13;insn per cycle +;;;;;0.13;stalled cycles per insn +216559082;;branches:u;517782741;75.00;313.989;M/sec +1542284;;branch-misses:u;517881196;75.00;0.71;of all branches +286757265;;L1-dcache-loads:u;517504374;75.00;415.769;M/sec +9357536;;L1-dcache-load-misses:u;515435585;74.00;3.26;of all L1-dcache accesses +;;LLC-loads:u;0;100.00;; +;;LLC-load-misses:u;0;100.00;; +""" + + +class TestLinuxPerfReport(unittest.TestCase): + """Tests if the Linux perf report can be loaded correctly.""" + + report_1: LinuxPerfReport + report_2: LinuxPerfReport + + @classmethod + def setUpClass(cls) -> None: + """Load Linux perf report.""" + with mock.patch( + "builtins.open", new=mock.mock_open(read_data=PERF_REPORT_1) + ): + cls.report_1 = LinuxPerfReport(Path("fake_file_path")) + + with mock.patch( + "builtins.open", new=mock.mock_open(read_data=PERF_REPORT_2) + ): + cls.report_2 = LinuxPerfReport(Path("fake_file_path")) + + def test_task_clock_parsing(self) -> None: + """Checks if we correctly parsed the value for task clock.""" + self.assertEqual(self.report_1.task_clock, 0.28) + self.assertEqual(self.report_2.task_clock, 689.70) + + def test_context_switches_parsing(self) -> None: + """Checks if we correctly parsed the value for context switches.""" + self.assertEqual(self.report_1.ctx_switches, 0) + self.assertEqual(self.report_2.ctx_switches, 0) + + def test_branch_misses_parsing(self) -> None: + """Checks if we correctly parsed the value for branch misses.""" + self.assertEqual(self.report_1.branch_misses, 2946) + self.assertEqual(self.report_2.branch_misses, 1542284) diff --git a/varats-core/varats/report/linux_perf_report.py b/varats-core/varats/report/linux_perf_report.py new file mode 100644 index 000000000..cdcec0c3c --- /dev/null +++ b/varats-core/varats/report/linux_perf_report.py @@ -0,0 +1,115 @@ +""" +Simple report module to create and handle the standard timing output of perf +stat. + +Examples to produce a ``LinuxPerfReport``: + + Commandline usage: + .. code-block:: bash + + export REPORT_FILE="Path/To/MyFile" + perf stat -x ";" -o $REPORT_FILE -- sleep 2 + + Experiment code: + .. code-block:: python + + from benchbuild.utils.cmd import time, sleep + report_file = "Path/To/MyFile" + command = sleep["2"] + perf("stat", "-x", "';'", "-o", f"{report_file}", "--", command) +""" +import csv +import math +import typing as tp +from pathlib import Path + +from varats.report.report import BaseReport, ReportAggregate + + +class LinuxPerfReport(BaseReport, shorthand="LPR", file_type="txt"): + """Report class to access perf stat output.""" + + def __init__(self, path: Path) -> None: + super().__init__(path) + self.__task_clock = math.nan + self.__ctx_switches: int = -1 + self.__branch_misses: int = -1 + + with open(self.path, 'r', newline="") as stream: + reader = csv.reader(stream, delimiter=';') + print(f"{reader=}") + + for row in reader: + print(f"{row=}") + + if len(row) == 0 or row[0].startswith("#"): + continue + + metric_name = self.__metric_name(row) + if not metric_name: + continue + + if metric_name == "task-clock:u": + self.__task_clock = float(self.__metric_value(row)) + elif metric_name == "context-switches:u": + self.__ctx_switches = int(self.__metric_value(row)) + elif metric_name == "branch-misses:u": + self.__branch_misses = int(self.__metric_value(row)) + + @staticmethod + def __metric_value(row: tp.List[tp.Any]) -> tp.Any: + return row[0] + + @staticmethod + def __metric_unit(row: tp.List[tp.Any]) -> tp.Any: + return row[1] + + @staticmethod + def __metric_name(row: tp.List[tp.Any]) -> str: + return row[2] + + @property + def task_clock(self) -> float: + return self.__task_clock + + @property + def ctx_switches(self) -> int: + return self.__ctx_switches + + @property + def branch_misses(self) -> int: + return self.__branch_misses + + def __repr__(self) -> str: + return str(self) + + def __str__(self) -> str: + return f"""LPR ({self.path}) + ├─ TaskClock: {self.task_clock} + ├─ CtxSwitches: {self.ctx_switches} + └─ BranchMisses: {self.branch_misses} +""" + + +class LinuxPerfReportAggregate( + ReportAggregate[LinuxPerfReport], + shorthand=LinuxPerfReport.SHORTHAND + ReportAggregate.SHORTHAND, + file_type=ReportAggregate.FILE_TYPE +): + """Meta report for parsing multiple Linux perf reports stored inside a zip + file.""" + + def __init__(self, path: Path) -> None: + super().__init__(path, LinuxPerfReport) + + @property + def clock_times(self) -> tp.List[float]: + return [report.task_clock for report in self.reports()] + + @property + def ctx_switches(self) -> tp.List[int]: + return [report.ctx_switches for report in self.reports()] + + @property + def branch_misses(self) -> tp.List[int]: + return [report.branch_misses for report in self.reports()] From 3fa09b5b72a568d4c30fafaaebdf1552fd5b45ba Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 23 Jul 2023 23:37:43 +0200 Subject: [PATCH 080/224] Ports overhead measurements to perf for more metrics --- tests/report/test_linux_perf_report.py | 98 ++++++++++++------- .../varats/report/linux_perf_report.py | 60 +++++++----- .../feature_perf_precision_database.py | 47 ++++++--- .../vara/feature_perf_precision.py | 32 +++--- varats/varats/plots/feature_perf_precision.py | 11 ++- 5 files changed, 152 insertions(+), 96 deletions(-) diff --git a/tests/report/test_linux_perf_report.py b/tests/report/test_linux_perf_report.py index df397276b..2480d531a 100644 --- a/tests/report/test_linux_perf_report.py +++ b/tests/report/test_linux_perf_report.py @@ -7,42 +7,64 @@ from varats.report.linux_perf_report import LinuxPerfReport -PERF_REPORT_1 = """# started on Sun Jul 23 16:33:56 2023 - -0.28;msec;task-clock:u;281620;100.00;0.398;CPUs utilized -0;;context-switches:u;281620;100.00;0.000;/sec -0;;cpu-migrations:u;281620;100.00;0.000;/sec -63;;page-faults:u;281620;100.00;223.706;K/sec -297468;;cycles:u;282100;100.00;1.056;GHz -21086;;stalled-cycles-frontend:u;282100;100.00;7.09;frontend cycles idle -84315;;stalled-cycles-backend:u;282100;100.00;28.34;backend cycles idle -200506;;instructions:u;282100;100.00;0.67;insn per cycle -;;;;;0.42;stalled cycles per insn -48602;;branches:u;282100;100.00;172.580;M/sec -2946;;branch-misses:u;282100;100.00;6.06;of all branches -;;L1-dcache-loads:u;0;0.00;; -;;L1-dcache-load-misses:u;0;0.00;; -;;LLC-loads:u;0;100.00;; -;;LLC-load-misses:u;0;100.00;; +PERF_REPORT_1 = """# started on Sun Jul 23 22:51:54 2023 + + + Performance counter stats for 'echo foo:bar': + + 0.30 msec task-clock:u # 0.406 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 64 page-faults:u # 212.723 K/sec + 360,721 cycles:u # 1.199 GHz + 26,199 stalled-cycles-frontend:u # 7.26% frontend cycles idle + 111,008 stalled-cycles-backend:u # 30.77% backend cycles idle + 200,655 instructions:u # 0.56 insn per cycle + # 0.55 stalled cycles per insn + 48,631 branches:u # 161.639 M/sec + 3,012 branch-misses:u # 6.19% of all branches + L1-dcache-loads:u (0.00%) + L1-dcache-load-misses:u (0.00%) + LLC-loads:u + LLC-load-misses:u + + 0.000741511 seconds time elapsed + + 0.000000000 seconds user + 0.000822000 seconds sys + + + """ -PERF_REPORT_2 = """# started on Sun Jul 23 16:36:38 2023 - -689.70;msec;task-clock:u;689702567;100.00;0.158;CPUs utilized -0;;context-switches:u;689702567;100.00;0.000;/sec -0;;cpu-migrations:u;689702567;100.00;0.000;/sec -2924;;page-faults:u;689702567;100.00;4.240;K/sec -442557352;;cycles:u;513385825;74.00;0.642;GHz -6447861;;stalled-cycles-frontend:u;513968009;74.00;1.46;frontend cycles idle -120234822;;stalled-cycles-backend:u;517763201;75.00;27.17;backend cycles idle -944044714;;instructions:u;519151351;75.00;2.13;insn per cycle -;;;;;0.13;stalled cycles per insn -216559082;;branches:u;517782741;75.00;313.989;M/sec -1542284;;branch-misses:u;517881196;75.00;0.71;of all branches -286757265;;L1-dcache-loads:u;517504374;75.00;415.769;M/sec -9357536;;L1-dcache-load-misses:u;515435585;74.00;3.26;of all L1-dcache accesses -;;LLC-loads:u;0;100.00;; -;;LLC-load-misses:u;0;100.00;; +PERF_REPORT_2 = """# started on Sun Jul 23 22:44:31 2023 + + + Performance counter stats for '/home/vulder/vara-root/benchbuild/results/GenBBBaselineO/SynthSAContextSensitivity-perf_tests@a8c3a8722f,0/SynthSAContextSensitivity/build/bin/ContextSense --compress --mem 10 8': + + 1.23 msec task-clock:u # 0.000 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 132 page-faults:u # 107.572 K/sec + 850,975 cycles:u # 0.693 GHz (12.81%) + 140,154 stalled-cycles-frontend:u # 16.47% frontend cycles idle + 1,012,322 stalled-cycles-backend:u # 118.96% backend cycles idle + 1,785,912 instructions:u # 2.10 insn per cycle + # 0.57 stalled cycles per insn + 325,708 branches:u # 265.433 M/sec + 11,160 branch-misses:u # 3.43% of all branches + 840,918 L1-dcache-loads:u # 685.298 M/sec (87.19%) + L1-dcache-load-misses:u (0.00%) + LLC-loads:u + LLC-load-misses:u + + 5.945920439 seconds time elapsed + + 0.000376000 seconds user + 0.001390000 seconds sys + + + """ @@ -67,8 +89,8 @@ def setUpClass(cls) -> None: def test_task_clock_parsing(self) -> None: """Checks if we correctly parsed the value for task clock.""" - self.assertEqual(self.report_1.task_clock, 0.28) - self.assertEqual(self.report_2.task_clock, 689.70) + self.assertEqual(self.report_1.elapsed_time, 0.000741511) + self.assertEqual(self.report_2.elapsed_time, 5.945920439) def test_context_switches_parsing(self) -> None: """Checks if we correctly parsed the value for context switches.""" @@ -77,5 +99,5 @@ def test_context_switches_parsing(self) -> None: def test_branch_misses_parsing(self) -> None: """Checks if we correctly parsed the value for branch misses.""" - self.assertEqual(self.report_1.branch_misses, 2946) - self.assertEqual(self.report_2.branch_misses, 1542284) + self.assertEqual(self.report_1.branch_misses, 3012) + self.assertEqual(self.report_2.branch_misses, 11160) diff --git a/varats-core/varats/report/linux_perf_report.py b/varats-core/varats/report/linux_perf_report.py index cdcec0c3c..c4c9a15b5 100644 --- a/varats-core/varats/report/linux_perf_report.py +++ b/varats-core/varats/report/linux_perf_report.py @@ -31,46 +31,52 @@ class LinuxPerfReport(BaseReport, shorthand="LPR", file_type="txt"): def __init__(self, path: Path) -> None: super().__init__(path) - self.__task_clock = math.nan + self.__elapsed_time = math.nan self.__ctx_switches: int = -1 self.__branch_misses: int = -1 with open(self.path, 'r', newline="") as stream: - reader = csv.reader(stream, delimiter=';') - print(f"{reader=}") + for line in stream: + line = line.strip("\n ") + print(f"{line=}") - for row in reader: - print(f"{row=}") - - if len(row) == 0 or row[0].startswith("#"): + if line == "" or line.startswith("#"): continue - metric_name = self.__metric_name(row) - if not metric_name: - continue + # TODO: impl cmd + # if line.startswith("Performance counter"): + # print(f"CMD: {line}") + + if "time elapsed" in line: + print("time line") + self.__elapsed_time = self.__parse_elapsed_time(line) + + if "context-switches:u" in line: + print("branchi line") + self.__ctx_switches = self.__parse_ctx_switches(line) + + if "branch-misses:u" in line: + print("branchi line") + self.__branch_misses = self.__parse_branch_misses(line) - if metric_name == "task-clock:u": - self.__task_clock = float(self.__metric_value(row)) - elif metric_name == "context-switches:u": - self.__ctx_switches = int(self.__metric_value(row)) - elif metric_name == "branch-misses:u": - self.__branch_misses = int(self.__metric_value(row)) + if self.__branch_misses == math.nan: + raise AssertionError() @staticmethod - def __metric_value(row: tp.List[tp.Any]) -> tp.Any: - return row[0] + def __parse_elapsed_time(line: str) -> float: + return float(line.split(" ")[0].replace(",", "")) @staticmethod - def __metric_unit(row: tp.List[tp.Any]) -> tp.Any: - return row[1] + def __parse_ctx_switches(line: str) -> int: + return int(line.split(" ")[0].replace(",", "")) @staticmethod - def __metric_name(row: tp.List[tp.Any]) -> str: - return row[2] + def __parse_branch_misses(line: str) -> int: + return int(line.split(" ")[0].replace(",", "")) @property - def task_clock(self) -> float: - return self.__task_clock + def elapsed_time(self) -> float: + return self.__elapsed_time @property def ctx_switches(self) -> int: @@ -85,7 +91,7 @@ def __repr__(self) -> str: def __str__(self) -> str: return f"""LPR ({self.path}) - ├─ TaskClock: {self.task_clock} + ├─ ElapsedTime: {self.elapsed_time} ├─ CtxSwitches: {self.ctx_switches} └─ BranchMisses: {self.branch_misses} """ @@ -103,8 +109,8 @@ def __init__(self, path: Path) -> None: super().__init__(path, LinuxPerfReport) @property - def clock_times(self) -> tp.List[float]: - return [report.task_clock for report in self.reports()] + def elapsed_time(self) -> tp.List[float]: + return [report.elapsed_time for report in self.reports()] @property def ctx_switches(self) -> tp.List[int]: diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 8d68770bc..76f4d4374 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -13,6 +13,7 @@ from varats.paper.case_study import CaseStudy from varats.paper_mgmt.case_study import get_case_study_file_name_filter from varats.report.gnu_time_report import TimeReportAggregate +from varats.report.linux_perf_report import LinuxPerfReportAggregate from varats.report.report import BaseReport, ReportFilepath from varats.report.tef_report import ( TEFReport, @@ -399,7 +400,7 @@ class Baseline(Profiler): def __init__(self) -> None: super().__init__( "Base", fpp.BlackBoxBaselineRunner, fpp.BlackBoxOverheadBaseline, - fpp.TimeReportAggregate + fpp.LinuxPerfReportAggregate ) def is_regression(self, report_path: ReportFilepath) -> bool: @@ -442,15 +443,20 @@ class OverheadData: def __init__( self, profiler, mean_time: tp.Dict[int, float], - ctx_switches: tp.Dict[int, float] + mean_bmiss: tp.Dict[int, float], ctx_switches: tp.Dict[int, float] ) -> None: self.__profiler = profiler self.__mean_time: tp.Dict[int, float] = mean_time + self.__mean_bmiss: tp.Dict[int, float] = mean_bmiss self.__mean_ctx_switches: tp.Dict[int, float] = ctx_switches def mean_time(self) -> float: return float(np.mean(list(self.__mean_time.values()))) + def mean_bmiss(self) -> float: + print(f"----> here {float(np.mean(list(self.__mean_bmiss.values())))}") + return float(np.mean(list(self.__mean_bmiss.values()))) + def mean_ctx(self) -> float: return float(np.mean(list(self.__mean_ctx_switches.values()))) @@ -458,6 +464,10 @@ def config_wise_time_diff(self, other: 'OverheadData') -> tp.Dict[int, float]: return self.__config_wise(self.__mean_time, other.__mean_time) + def config_wise_bmiss_diff(self, + other: 'OverheadData') -> tp.Dict[int, float]: + return self.__config_wise(self.__mean_bmiss, other.__mean_bmiss) + def config_wise_ctx_diff(self, other: 'OverheadData') -> tp.Dict[int, float]: return self.__config_wise( @@ -483,13 +493,14 @@ def compute_overhead_data( ) -> tp.Optional['OverheadData']: mean_time: tp.Dict[int, float] = {} + mean_bmiss: tp.Dict[int, float] = {} mean_cxt_switches: tp.Dict[int, float] = {} for config_id in case_study.get_config_ids_for_revision(rev): report_files = get_processed_revisions_files( case_study.project_name, profiler.overhead_experiment, - TimeReportAggregate, + LinuxPerfReportAggregate, get_case_study_file_name_filter(case_study), config_id=config_id ) @@ -503,12 +514,11 @@ def compute_overhead_data( ) return None - time_report = TimeReportAggregate(report_files[0].full_path()) - mean_time[config_id] = float( - np.mean(time_report.measurements_wall_clock_time) - ) + time_report = LinuxPerfReportAggregate(report_files[0].full_path()) + mean_time[config_id] = float(np.mean(time_report.elapsed_time)) + mean_bmiss[config_id] = float(np.mean(time_report.branch_misses)) mean_cxt_switches[config_id] = float( - np.mean(time_report.measurements_ctx_switches) + np.mean(time_report.ctx_switches) ) if not mean_time: print( @@ -518,7 +528,7 @@ def compute_overhead_data( return None # print(f"{mean_time=}") - return OverheadData(profiler, mean_time, mean_cxt_switches) + return OverheadData(profiler, mean_time, mean_bmiss, mean_cxt_switches) def load_precision_data(case_studies, profilers) -> pd.DataFrame: @@ -563,11 +573,6 @@ def load_precision_data(case_studies, profilers) -> pd.DataFrame: new_row['recall'] = results.recall() new_row['f1_score'] = results.f1_score() new_row['Profiler'] = profiler.name - # new_row[f"{profiler.name}_precision" - # ] = results.precision() - # new_row[f"{profiler.name}_recall"] = results.recall() - # new_row[f"{profiler.name}_baccuracy" - # ] = results.balanced_accuracy() else: new_row['precision'] = np.nan new_row['recall'] = np.nan @@ -596,10 +601,11 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: new_row = { 'CaseStudy': project_name, 'Profiler': "Base", - 'time': - overhead_ground_truth.mean_time(), # random.randint(2, 230), + 'time': overhead_ground_truth.mean_time(), + 'bmiss': overhead_ground_truth.mean_bmiss(), 'ctx': overhead_ground_truth.mean_ctx(), 'overhead_time': 0, + 'overhead_bmiss': 0, 'overhead_ctx': 0 } @@ -616,6 +622,9 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: time_diff = profiler_overhead.config_wise_time_diff( overhead_ground_truth ) + bmiss_diff = profiler_overhead.config_wise_bmiss_diff( + overhead_ground_truth + ) ctx_diff = profiler_overhead.config_wise_ctx_diff( overhead_ground_truth ) @@ -623,12 +632,18 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: new_row['time'] = profiler_overhead.mean_time() new_row['overhead_time'] = np.mean(list(time_diff.values())) + new_row['bmiss'] = profiler_overhead.mean_bmiss() + new_row['overhead_bmiss'] = np.mean(list(bmiss_diff.values())) + new_row['ctx'] = profiler_overhead.mean_ctx() new_row['overhead_ctx'] = np.mean(list(ctx_diff.values())) else: new_row['time'] = np.nan new_row['overhead_time'] = np.nan + new_row['bmiss'] = np.nan + new_row['overhead_bmiss'] = np.nan + new_row['ctx'] = np.nan new_row['overhead_ctx'] = np.nan diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 26498ae38..04173bcc1 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -10,7 +10,7 @@ from benchbuild.command import cleanup from benchbuild.utils import actions from benchbuild.utils.actions import StepResult, Clean -from benchbuild.utils.cmd import time, rm, cp, numactl, sudo, bpftrace +from benchbuild.utils.cmd import time, rm, cp, numactl, sudo, bpftrace, perf from plumbum import local, BG from plumbum.commands.modifiers import Future @@ -42,6 +42,7 @@ RevertPatch, ) from varats.report.gnu_time_report import TimeReportAggregate +from varats.report.linux_perf_report import LinuxPerfReportAggregate from varats.report.multi_patch_report import MultiPatchReport from varats.report.report import ReportSpecification from varats.report.tef_report import TEFReportAggregate @@ -718,11 +719,13 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) print(f"Running example {prj_command.command.label}") - timed_pb_cmd = time["-v", "-o", time_report_file, - pb_cmd] + timed_pb_cmd = perf["stat", "-o", time_report_file, + "--", pb_cmd] extra_options = get_extra_config_options(self.project) with cleanup(prj_command): + # print("timed_pb_cmd=", str(timed_pb_cmd[*extra_options])) + timed_pb_cmd( *extra_options, retcode=self._binary.valid_exit_codes @@ -781,8 +784,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) print(f"Running example {prj_command.command.label}") - timed_pb_cmd = time["-v", "-o", time_report_file, - pb_cmd] + timed_pb_cmd = perf["stat", "-o", time_report_file, + "--", pb_cmd] extra_options = get_extra_config_options(self.project) @@ -854,8 +857,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) print(f"Running example {prj_command.command.label}") - timed_pb_cmd = time["-v", "-o", time_report_file, - pb_cmd] + timed_pb_cmd = perf["stat", "-o", time_report_file, + "--", pb_cmd] extra_options = get_extra_config_options(self.project) @@ -939,7 +942,7 @@ class TEFProfileOverheadRunner(FeatureExperiment, shorthand="TEFo"): NAME = "RunTEFProfilerO" - REPORT_SPEC = ReportSpecification(TimeReportAggregate) + REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) def actions_for_project( self, project: VProject @@ -961,7 +964,7 @@ class PIMProfileOverheadRunner(FeatureExperiment, shorthand="PIMo"): NAME = "RunPIMProfilerO" - REPORT_SPEC = ReportSpecification(TimeReportAggregate) + REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) def actions_for_project( self, project: VProject @@ -984,7 +987,7 @@ class EbpfTraceTEFOverheadRunner(FeatureExperiment, shorthand="ETEFo"): NAME = "RunEBPFTraceTEFProfilerO" - REPORT_SPEC = ReportSpecification(TimeReportAggregate) + REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) def actions_for_project( self, project: VProject @@ -1007,7 +1010,7 @@ class BccTraceTEFOverheadRunner(FeatureExperiment, shorthand="BCCo"): NAME = "RunBCCTEFProfilerO" - REPORT_SPEC = ReportSpecification(TimeReportAggregate) + REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) def actions_for_project( self, project: VProject @@ -1069,10 +1072,13 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) print(f"Running example {prj_command.command.label}") - timed_pb_cmd = time["-v", "-o", time_report_file, pb_cmd] + # timed_pb_cmd = time["-v", "-o", time_report_file, pb_cmd] + timed_pb_cmd = perf["stat", "-o", time_report_file, "--", + pb_cmd] extra_options = get_extra_config_options(self.project) with cleanup(prj_command): + # print("timed_pb_cmd=", str(timed_pb_cmd[*extra_options])) timed_pb_cmd( *extra_options, retcode=self.__binary.valid_exit_codes @@ -1086,7 +1092,7 @@ class BlackBoxOverheadBaseline(FeatureExperiment, shorthand="BBBaseO"): NAME = "GenBBBaselineO" - REPORT_SPEC = ReportSpecification(TimeReportAggregate) + REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) def actions_for_project( self, project: VProject diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index b54cf231b..ac3204f1d 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -175,6 +175,7 @@ class PerfOverheadPlot(Plot, plot_name='fperf_overhead'): def plot(self, view_mode: bool) -> None: # -- Configure plot -- plot_metric = [("Time", "overhead_time_rel"), + ("Branch Misses", "overhead_bmiss_rel"), ("Ctx", "overhead_ctx_rel")] target_row = "f1_score" # target_row = "precision" @@ -202,20 +203,26 @@ def plot(self, view_mode: bool) -> None: overhead_df['overhead_time_rel'] = overhead_df['time'] / ( overhead_df['time'] - overhead_df['overhead_time'] ) * 100 + overhead_df['overhead_ctx_rel'] = overhead_df['ctx'] / ( overhead_df['ctx'] - overhead_df['overhead_ctx'] ) * 100 + overhead_df["overhead_ctx_rel"].fillna(100, inplace=True) + + overhead_df['overhead_bmiss_rel'] = overhead_df['bmiss'] / ( + overhead_df['bmiss'] - overhead_df['overhead_bmiss'] + ) * 100 print(f"other_df=\n{overhead_df}") merged_df = pd.merge( precision_df, overhead_df, on=["CaseStudy", "Profiler"] ) - print(f"{merged_df=}") + print(f"merged_df=\n{merged_df}") # print(f"{self.plot_config.width()}") _, axes = plt.subplots( - ncols=len(plot_metric), nrows=1, figsize=(20, 10) + ncols=len(plot_metric), nrows=1, figsize=(30, 10) ) if len(plot_metric) == 1: From 8d4c3849fb0bcc1753892514c206e406667c0511 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 23 Jul 2023 23:40:36 +0200 Subject: [PATCH 081/224] Removes debug print --- varats-core/varats/report/linux_perf_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats-core/varats/report/linux_perf_report.py b/varats-core/varats/report/linux_perf_report.py index c4c9a15b5..a9ab3dd87 100644 --- a/varats-core/varats/report/linux_perf_report.py +++ b/varats-core/varats/report/linux_perf_report.py @@ -38,7 +38,7 @@ def __init__(self, path: Path) -> None: with open(self.path, 'r', newline="") as stream: for line in stream: line = line.strip("\n ") - print(f"{line=}") + # print(f"{line=}") if line == "" or line.startswith("#"): continue From 0ada9d81b955814cdd46e99fb7a9f98acb2f5cb4 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 24 Jul 2023 12:11:05 +0200 Subject: [PATCH 082/224] Adapts remote refs --- .../varats/provider/patch/patch_provider.py | 2 +- .../perf_tests/feature_perf_cs_collection.py | 24 +++++++------------ 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index e313af240..b02fc1c9c 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -347,7 +347,7 @@ def _get_patches_repository_path() -> Path: patches_source = bb.source.Git( remote=PatchProvider.patches_repository, local="patch-configurations", - refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", + refspec="origin/master", limit=None, shallow=False ) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index a4a4571be..f2de7d550 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -171,7 +171,7 @@ class SynthSAFieldSensitivity(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthSAFieldSensitivity", - refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", + refspec="origin/master", limit=None, shallow=False, version_filter=project_filter_generator("SynthSAFieldSensitivity") @@ -201,9 +201,7 @@ def binaries_for_revision( BinaryType.EXECUTABLE, # TODO: fix with commit after merge # only_valid_in=RevisionRange("162db88346", "master") - only_valid_in=RevisionRange( - "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarksImpl" - ) + only_valid_in=RevisionRange("162db88346", "master") ) return binary_map[revision] @@ -233,7 +231,7 @@ class SynthSAFlowSensitivity(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthSAFlowSensitivity", - refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", + refspec="origin/master", limit=None, shallow=False, version_filter=project_filter_generator("SynthSAFlowSensitivity") @@ -263,9 +261,7 @@ def binaries_for_revision( BinaryType.EXECUTABLE, # TODO: fix with commit after merge # only_valid_in=RevisionRange("162db88346", "master") - only_valid_in=RevisionRange( - "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarksImpl" - ) + only_valid_in=RevisionRange("162db88346", "master") ) return binary_map[revision] @@ -295,7 +291,7 @@ class SynthSAContextSensitivity(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthSAContextSensitivity", - refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", + refspec="origin/master", limit=None, shallow=False, version_filter=project_filter_generator( @@ -328,9 +324,7 @@ def binaries_for_revision( BinaryType.EXECUTABLE, # TODO: fix with commit after merge # only_valid_in=RevisionRange("162db88346", "master") - only_valid_in=RevisionRange( - "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarksImpl" - ) + only_valid_in=RevisionRange("162db88346", "master") ) return binary_map[revision] @@ -360,7 +354,7 @@ class SynthSAWholeProgram(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthSAWholeProgram", - refspec="origin/f-StaticAnalysisMotivatedSynthBenchmarksImpl", + refspec="origin/master", limit=None, shallow=False, version_filter=project_filter_generator("SynthSAWholeProgram") @@ -390,9 +384,7 @@ def binaries_for_revision( BinaryType.EXECUTABLE, # TODO: fix with commit after merge # only_valid_in=RevisionRange("162db88346", "master") - only_valid_in=RevisionRange( - "162db88346", "f-StaticAnalysisMotivatedSynthBenchmarksImpl" - ) + only_valid_in=RevisionRange("162db88346", "master") ) return binary_map[revision] From 570713ad0869ab0e10e7593c5c04a4c997642dc3 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 24 Jul 2023 12:37:48 +0200 Subject: [PATCH 083/224] Fixes commit lookup --- varats-core/varats/provider/patch/patch_provider.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index b02fc1c9c..79407b4c4 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -173,12 +173,13 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: ) ) else: - res.update({ - ShortCommitHash(h) for h in _get_all_revisions_between( + res.update( + get_all_revisions_between( rev_dict["revision_range"]["start"], - rev_dict["revision_range"]["end"], main_repo_git + rev_dict["revision_range"]["end"], ShortCommitHash, + get_local_project_git_path(project_name) ) - }) + ) return res From 32dc069dcd55f4864033d8e9c4f376970742a76e Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 24 Jul 2023 12:37:59 +0200 Subject: [PATCH 084/224] Correctly sets provider remote --- varats-core/varats/provider/patch/patch_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 79407b4c4..06f28c2a4 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -348,7 +348,7 @@ def _get_patches_repository_path() -> Path: patches_source = bb.source.Git( remote=PatchProvider.patches_repository, local="patch-configurations", - refspec="origin/master", + refspec="origin/main", limit=None, shallow=False ) From ec079f5548964141a4f9b05ccc1779d71ff1157a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 24 Jul 2023 12:48:03 +0200 Subject: [PATCH 085/224] Removes comments --- varats-core/varats/report/linux_perf_report.py | 3 --- .../varats/data/databases/feature_perf_precision_database.py | 1 - varats/varats/plots/feature_perf_precision.py | 1 - 3 files changed, 5 deletions(-) diff --git a/varats-core/varats/report/linux_perf_report.py b/varats-core/varats/report/linux_perf_report.py index a9ab3dd87..a4f68f4fc 100644 --- a/varats-core/varats/report/linux_perf_report.py +++ b/varats-core/varats/report/linux_perf_report.py @@ -48,15 +48,12 @@ def __init__(self, path: Path) -> None: # print(f"CMD: {line}") if "time elapsed" in line: - print("time line") self.__elapsed_time = self.__parse_elapsed_time(line) if "context-switches:u" in line: - print("branchi line") self.__ctx_switches = self.__parse_ctx_switches(line) if "branch-misses:u" in line: - print("branchi line") self.__branch_misses = self.__parse_branch_misses(line) if self.__branch_misses == math.nan: diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 76f4d4374..737112326 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -454,7 +454,6 @@ def mean_time(self) -> float: return float(np.mean(list(self.__mean_time.values()))) def mean_bmiss(self) -> float: - print(f"----> here {float(np.mean(list(self.__mean_bmiss.values())))}") return float(np.mean(list(self.__mean_bmiss.values()))) def mean_ctx(self) -> float: diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index ac3204f1d..d8bf37178 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -256,7 +256,6 @@ def do_single_plot( text_obj: Text text_obj.set_fontsize("small") - print(f"{text_obj=}") if text_obj.get_text() == "Profiler": text_obj.set_text("Profilers") text_obj.set_fontweight("bold") From f609f07f0974272ea5cdde85719a9a5f3939fc7e Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 24 Jul 2023 13:05:44 +0200 Subject: [PATCH 086/224] Fixes packaging tool --- varats/varats/paper_mgmt/paper_config_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varats/varats/paper_mgmt/paper_config_manager.py b/varats/varats/paper_mgmt/paper_config_manager.py index 542f0c86d..4a418fd44 100644 --- a/varats/varats/paper_mgmt/paper_config_manager.py +++ b/varats/varats/paper_mgmt/paper_config_manager.py @@ -406,10 +406,10 @@ def package_paper_config( output_file, "w", compression=ZIP_DEFLATED, compresslevel=9 ) as pc_zip: for file_path in files_to_store: - pc_zip.write(file_path.relative_to(vara_root)) + pc_zip.write(file_path.resolve().relative_to(vara_root)) for case_study_file in case_study_files_to_include: - pc_zip.write(case_study_file.relative_to(vara_root)) + pc_zip.write(case_study_file.resolve().relative_to(vara_root)) def _combine_tagged_revs_for_experiment( From 7d4ed1e64a67ef28db264fd827429372d5d27e1e Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 25 Jul 2023 12:44:19 +0200 Subject: [PATCH 087/224] Adds experiment to bb config gen --- varats/varats/tools/bb_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/varats/varats/tools/bb_config.py b/varats/varats/tools/bb_config.py index 294856346..ab9212210 100644 --- a/varats/varats/tools/bb_config.py +++ b/varats/varats/tools/bb_config.py @@ -112,6 +112,7 @@ def update_experiments(bb_cfg: s.Configuration) -> None: 'varats.experiments.vara.feature_perf_runner', 'varats.experiments.vara.feature_perf_sampling', 'varats.experiments.vara.feature_perf_tracing', + 'varats.experiments.vara.feature_perf_precision', 'varats.experiments.vara.feature_tracing_stats', 'varats.experiments.vara.feature_instrumentation_points', 'varats.experiments.vara.instrumentation_verifier', From ae7d67af5cc94858002ca4ac6d656ff2f0329e1a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 25 Jul 2023 14:26:29 +0200 Subject: [PATCH 088/224] Fixes packaging tool to work with config ids --- .../SynthSAContextSensitivity_0.case_study | 19 +++++++++ ...4c6-8ce0-08d0a29c677b_config-1_success.zip | Bin 0 -> 4970 bytes ...7ba-abbd-90c98e88a37c_config-0_success.zip | Bin 0 -> 5145 bytes tests/paper_mgmt/test_case_study.py | 40 ++++++++++++++++++ varats/varats/paper_mgmt/case_study.py | 22 +++++++--- .../varats/paper_mgmt/paper_config_manager.py | 4 +- 6 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 tests/TEST_INPUTS/paper_configs/test_config_ids/SynthSAContextSensitivity_0.case_study create mode 100644 tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBase-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/8380144f-9a25-44c6-8ce0-08d0a29c677b_config-1_success.zip create mode 100644 tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBase-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/b24ee2c1-fc85-47ba-abbd-90c98e88a37c_config-0_success.zip diff --git a/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthSAContextSensitivity_0.case_study b/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthSAContextSensitivity_0.case_study new file mode 100644 index 000000000..2a872480b --- /dev/null +++ b/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthSAContextSensitivity_0.case_study @@ -0,0 +1,19 @@ +--- +DocType: CaseStudy +Version: 1 +... +--- +project_name: SynthSAContextSensitivity +stages: +- revisions: + - commit_hash: 06eac0edb6886a7e487867c8d5629cb2409b54fd + commit_id: 57 + config_ids: + - 0 + - 1 +version: 0 +... +--- +0: '["--compress", "--mem", "10", "8"]' +1: '["--compress", "--mem", "300", "8"]' +... diff --git a/tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBase-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/8380144f-9a25-44c6-8ce0-08d0a29c677b_config-1_success.zip b/tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBase-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/8380144f-9a25-44c6-8ce0-08d0a29c677b_config-1_success.zip new file mode 100644 index 0000000000000000000000000000000000000000..df6194dc1212a7b0d6ff924bace3515c4cd713d2 GIT binary patch literal 4970 zcmbW5WmFX2w!miq0SQ4$LO_tATM+3G6&R$uYv}G!N{~_zkRC#MNa>CNhVIT8K)So* za_@WZ-v5XDV!d-doPEw&Ywz>v{MK(PfzdHY0000s;P&%{M)skv^VlN*fE)_|5PUdw zGoD>>zfwR(4jd4>yj6b`WD1H)n_)#NO3~)7#21 zZo=!$xYzU6Lt{MWphSv~)n&0R)lNT6IqKYMDLnX+R-j#*fGM&%5?7ebd*^FSkz{#m_X1fE0w-40t+Gi*@52 z*iqw00TOE5*x7*j<)$H`o6oWsP>LjJx*(O`7^B)~U!^-QV$cFvYCl{5RFad<#P@2| z;;sKY%Kv1wKV5UEmQFn45%KY=R{lv1O_VqMoaQA9-JR8hI$}g;(A-e9xJ&*?SzU_GScp+kD+}Uwl1%3hW$l7< zz26wC)N)%CWO%zF=Lh}=#8I@!GT_=2o8|Jo_da8%*sjAkgACSijvFhYsQ_R* zUfYYw;K1AIt0#|@7A6AnT6oyiaW*bIze%_4W6BKup&l!@p^tT4jkf#=FOV4$#a6HB zab&tq*43A=e~wDBc3n*F0gkN;B z4Mut-jm{5;iHt2LX_g-=?HEa|!!c6kq6g1@;}e$?-6K|^iy>8?91Jn^Cc0ckCGt1> z1^JbZrCCd_n~gKoXkIcMLN+NqGHmU(0$H;_$W`d7Ju?vHiPNaq&2#y-QJ##d&@8-1 zQ&AQKXDenEod|o*tODz%|1^%JO*X=Bd*2dp?hlN;kmx5MAD-v+U*L7jC{daW1TB)A zx=1@`UXKEjJB4a*O+JS9`Ux#iG&3s+KRBs@`L@2Adu+mK)zi^y3~ETv#np zUgFtH+NL5sDcme+W~WImcS`cka{(R2pH`zuBwC-O`uspZmnOy*tUbHqdOrt(aE-4p z`-)&`uLJp1#L(sx_>1IM*+ala7&DBTGxU)j!QU(PgyW4$vifZbM+xch2g~jyfM}4C z2mo+|^i=!Ls5k&k_A}N{#oQ+{+4(^V>1bL9hj>ENJC3A5iZ8KmW=#tjS{SOznAJjI z4(73=yWK|qt&mO;W95WzmePGm^Ut#4*v>5@$Z@5W4Bpzzi-c^rL&bz?T{M>aF9Z*_ zb_++SPBQ}(LPS~B7DniqD3?a77DZAbLO6t})N;lgT;b_8O^#{KYI~px6g#IW8*gjC zyQ!pXtJ5LOT7MucGF{5I)xR?VGx5!UR-T=9%+(B|GOE`<68#5oH>ameZlSs-3X{+u zXzLY0s~y{Dwa8ql`?50iY6RdpCgDbTeA)96oh{>cs=bJ!afH0Lx&rz^(@FvLt<<6(eadl5?v7v z%j=gE`G6p6E#=rVsaPz#;JFD)-5_UDS{g#?a}~qW%}My%N!1#>o%>U2wwkW9ETG{) zf$*38aLeCpG(KCRFAA#Bj#9S@&;^3Cm5^_)ZLaP!?~_+G*|yBWRq6NAIi%*&slRNur^E=h^;*m-!De5}eZuhe`mAKj<)82=4i7Oq9Yj~(5f@9L zxEY699`6&(NEv-2$&1^+3<8)umsWp`$7zd&XEyjPAzU*89g{NV18qkFhH(VcjFNm; zPFX6lpWslSMkpf@*Y(90WW1fOk@PnuuQBq<3820L)-X`am*SwN2A;>X&ATD&(Vjru zG8sQa{OF>kl+RG%CV6~_IQUWXP;nEfwBs=W_knXj>Gb5LxY9pKar8h6)r2ev022V9 zcpwG$Ur538`k$ou9|3`TU~c}@*vM$n=tAxKrR||X^?nFB1vzUH1-Y7krfe7jCYa;} zI~=_90>MZ@aF`bYR_}FLpXBu@2Xy$}8$=4Kiv4`;Ab3!jdY{o`UZ7zbIx_&4s|ZAt z2C0{BSkJhN_H`e0Zzki$Y&|)xs)jnWn_P@2?8?a(4cNO&Ky% z*o&6mWX~^IO!g@HG7lPh_VDg(rj&ZzQ#s78@59b@8`_P`-`|~s-Sap(tp}hU7mCU6 z-bHnAc49=gZXIL=e-n3EKdfkZqM$r(L6E>*W-^DmIEh<~ggn2y)|t!|?mR5-J4ZSj z#wrmx7Qxx~Zp+q{bHeJvuV^W_k_;y0Y=$6n% zJX65-DY=*{C_WocCDmoH)1Tr%5hc<$vP*jQj$Gk3SkTnAFsYim0mt;MnxmfBl)rnf zWsQprXWc1!n%J^kULUPu**q~g+kUleLon6SaA)~Q(R}NODImZa!v7Tqfun1987zkeSd+(m5w^*#{K`ot*YxhsNmVpO+2Q-P8nUrbqF+VWz;8riD z1@7a&`0ws`wwDkNg3ub1DzOY?@XlGPFayHj3 zqRx}kecZXWVDX(D!YZ#^+X6kHy@LY%2~QM6o^p62GL}C<@=&L0>yW&}C~CGh6@KuY z3M-umk~u$L@82)?ke5w89%8tN0``=%_C*Di4pa~gh0n;!4}C0+o(|M~jbYCnbgd_E zo5|_9!9}tB^`QyX0`p^KjdVA4fiF>dgtqxEUQ}$oG)2)545h+^`c?#+?oU6U+>cGEZiyYucOjhe#M-3Q(A1@D52U(@W} z*N#hpA-kz8LYc)>v<_>Zrad7^P2tvSb1bPqCPVi;4!^Q|&()QwTjeAD+Ylt^YRK!@ zGwFoON}AFWUmjiTv2A@wm?Ao+)~NsR4j3mygV!E^6*rY!sp@Rj=KVd@O+(QDX9p_E zHLb~d2`XB_D(=_FlscH&QZ3^e{(+_(Ps z4R1OwhjK+OD`{ge$LXvjpc72AAbfJ06*sQr=4ygOOm+_%U=C0<)oPgUhiUt5hvvOU zCutMIWQ))D>FQ5K<<%F>thcjuB*tFBvx~>kUxn9^*Yz;{qRaTUXl9L%;$*?#yX2bO zI$TyzC)SCM_@vNPIM`tWf;ZhvZqMrJ4 zAPR=fYPP8~1H+NmS$)Sy064Ia2^{WTgQ!6V2Y#3hrlV07=6_jL8xs|QOKW1Vsk^`A zwwaiF&?cLJ7)onMyb_VQ^}d_*Sw;nWaGG$=kBbZM2mAU9yo|+uwYLSpjOXJe)E0b^ z5?bsinb_SXt*89OmcBYWoICnUaBUFP;y^FvC>$>c<^C!C(J<{a>Fqu= zqW%?6$4=Fd?Zp3naDay+m_?O%^vBDF_@tiNn5>oFx-8SNgNO9kx{d1B1^U|T? zW^Ca_16csz1qOMPfji{^)%x%yz#sbsAfxRX0mbc1J;!bJ(k4cea$KU5(Uag-5cbd5 zeMU%5x$Gv*#2~^zX{AX1Tf|dj3M@_ml8@)Eh|=VGy5T1DHE_c94=l}OiGxS~?SvB* zn|vK`JYXccIdVQdL5tCT>7wYUjac%cf`2geIrgtZ<(Eh?buY5uMd!#UaMF`sl`irK zvxSYT6c*s{jk8H$+w{hl2$_71taBsBm`IG#*K+Y0h9kB{v6ymXUp=9xzb(opEsb!^ zt+qEn>P}zJPb!JqU$<3p4u4cBm3@NytmZ2^Q3uTU7mgzCmRn3Qv>9zbB?3pTyngdm zXH1(-Ttn>g^k=S1wyRCa#Bzo1Gn!sd;?mi*^RrJ*9E=;D#Mo@P z%o0^}x#4e+gyjtD!jo~~fr{09&uYI`oq)~{8%!gsaeHZuw^qdO zAPS{JLnP<)k#s*IwGseX%juDNQm5<{U`NOj6%hn*q1lvc-Dr3PRQ$gl8ocED5PE+Y(^Ub~&Q12iv5~myf5^C1KHU5(j zw*|9^7;8^bnk7_HHnC(53&UyusoC}#V%ZCOr<#z#oAVCw_C@*8Q^`dJN&{Kzd;bh9 zV#qLe1o0SBO5)bve^)Dyt~xkFhO_9=EeH*bK$4$8l8gXO+aH#gkrR+W#D4+dIATTR zWW_9S4RlQh*6ddBRk+qlJ5I~Cq!e4v99jnP&BYvC?z6_XI5; z9fhudJuE=*5QAM|D9@(NtYd%}*uhuQN7WWh8TPz?Jmw0Ptjl&oUy@+*D|1UAH7Ur_ z$7>?bER(tRWR^{9waK`|C64Q+lO%z@h5lj&5Mm)A*(1IK-^1Uq6Og#qC)22&;t4y6 zQ1M}*CFKYo%uh_X9`%1ZJG+tr zg}SKhn>a*z%oV*br8yK8CT7KY1FKA*6zZ1MkvEVIkSmo z*KNun&1omFc-kWYzo*LJZJ!z}cJcPszShOhi0UC%b!Vn&A*=6y2`%<51}tm2p?i2ciqq+~OEr7tpq=ygvA0s>9 z@5}$Yr}Wn{|4u>vqX+fEal)#T39smHKKTJX( L03c2D_ubzB!4Qw5 literal 0 HcmV?d00001 diff --git a/tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBase-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/b24ee2c1-fc85-47ba-abbd-90c98e88a37c_config-0_success.zip b/tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBase-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/b24ee2c1-fc85-47ba-abbd-90c98e88a37c_config-0_success.zip new file mode 100644 index 0000000000000000000000000000000000000000..02e15588784a5e567cd9ddedaddada2d65970d3c GIT binary patch literal 5145 zcmbW*Ra6w-z6bC@7`jV31_9|t>F#c6K}O123t12M{S05D96KK{}*k=C5=k zLtM_e>z;ES&RyrOyZ6Jcwf1kneb#py>R{tg0{{Sg09n|jdAqvs;2$6Wz<>t;NZs#x zIrzPB0Xx}?O4$2=oqfQ*zVH~&=J^XwHgI&F{ zXM)73t(fo*n{=HRmA_^1$ESmXbBZVC8Gcrvh*i#^84zkQ5mhI9AO37kTvre#rxUQ%x{ZRqiyM;ZudkrmImU0=_-fPb0U*Hko)!!a(>GeRfNXe-iFzQ3cE zO5(9#!VPhf=R5DQkJF1GIvP)K#CaDyrtNZRqzR4PKklWkCbX-<%XWQ2Le3Q(y!1|L zj{_zZdpRFX-SmO8TYc{WDJ(~@x>2@rn+)l)bCTDF+<=pOI^OPp8Ru=B^=lZ%9n%RT zUxdO&jQ|J-W@x-CjluUPATK#qt|ad?1NPWo+mhV*roAL)YNV{?nbXU(J_R()Z70L; zV-D=jBfqLINfk*ojuT9%xSF{#gIng22GC$_x83&8eql6ZrJ+R@hQf^s7CZ;~IleP< zgkXD>7_7Cl5Ue+*{>+et%!FKjA*7PNu=%y`;u+qY&=%)4Qmm6ItvVlwO>4mrRqw94 zIQX;w59@PwJ$FW)rPo?SL2Rwg*4*LdJA!I*s|*v58~Uh9FU;lhF>Rutfznl4uWEs` zusb($-aciC4!@49Y2U6mE*8yrTg@a{#*jjl7P#qvijxwNtsc`8s`Q~4r^jH7Z390^ zTYKy(Q$+~y6VfG~u*ZVv<;KUnbOj0kJw7R+TY$tq3zid}(kdCwp|ZJYtJb&m^#*c= zr?o!}b6&}Rpe~bVKEtUsx3;&SLBMA*g{2*rZp6VT^C2vaX#Qpk_M^-8l#4YG?gJqk z;_n>FnhB8r7cfuYn^^jo8ewuk0C|=o#j<+})zC`Zq)@b$e-!tb+1#QMF;0~*Gr{T$ zZ|dt=|I^d<(5(QRuj&o^2^)MK?-}(~^-9IW7rJmDd@Z9ddBknEEG5`cCM>?Us-=U9 zJ#gG7{Ub8EYTS`aw1jJp+w0@If?nmNsj9E#UF^{l0TYRxXK77*1zV?BlxjjqWTwHz zYcA`Z9M#A#E+0B^v3m`bh?;26gdTLW;`hixcvs2$#mGF*)TzTD17G-;51+swL*L&+DO~#m;#@>Edd#>h_Kc7(P+Ue%?0CMb{%B! zVv{`qiwIH*WPQJJoLOPl4Tw%2!o+yI1MXJmd#9SBDf+SHcJSmBa!YmI@0Djo8yA^! zBoqrC*w;tmtIMnHc*ViSbeV@1YTK#Zqyv3EczDV0pLGuXl_t(<_;q?1CF3%xRr$n= z`-09eQ`zZB&GtYik7`N3<&tFgtwW(FRGx3)C7*W*sI)4h>=?*oA?($Ze^*ApD&q1U?`(03hgfq}=iBvJT2aeK z&HUgzSJwzk68(@Yl^*Pom{Qfy0eetKgH#;S&Y{6~%r7$lls!`?u$!%_mcKA0CRyS8 zu`lCzOGC=6Auq)n<55=KqMs-qY`oV8VdbawU)&e69^qa$WzcueV?g}OUFet~t4Tj$ zpD0q@4kb=fTXRccLP_T^zH(G5v*&ZhGo$>$t^C}5DVI1w4yRRt{&XHI!(JJEpc3U& zaV##1e2zh1$;G0Y=d`$cxXg2We4cO4;_%`nlR#yj8WqfP50w-y6&Do8(0o%So?cw; zv+fqN0s_OlC@ffq>ztKAd3nHRw;?RM^`GBH*y_ivQr(#fc2h=v|6EsaKUY1o*sz;j z?o|Utgmm3Jci06Fj-1?_lNtWQlSA00`OhLDJw{vrfbQNC;eYW&MDib={0Bxto>B$V z;TsXmulopH*n<}Q}`k8eCe7nJhzIM9ZsJ8GLcnu;mvRuS~o`> zTgTd@-)O8-mbg{8z_@GIN(4~0oc*YX=zGX%BUjTb3&LRfw%mO&jZ^Zs^tOG5G=JEp zSGT#`3Bc~f#LPr>pJA(k5L{2UaZ1hlVUyzCO|Nb0vCsU}Vdyi;bUN80!De@gj^D*4 zON)5vuL7{AI7hPLtLZXkZcUJ1>L51xoUFVfu*@K}8$-ZtF*bamup|xJrmaumulXxS zvx@EGAF!ybsmB*L5aVwk>aOy{{k(?CUc(Vpmm zN4Y}XiDMGRq!4r&T4tv|)LVbxS$^F7ubdI!>9d!3j-9x#aO~=Mbj{w(uKn)c(A=-Y zsR6p?P5LG|hH%g(Q*IV)Bt|^6Nc{S(Feqp5D3?TpBPc@L1c@26_ft^ni zUT7=e=gOA%7oPZUZz+VkIThpHnEz;oVKJ=YoQBc#Afn60>bIEo=l1&-m?O8C7xhNS zL`tmFE8j?Z*abpEA(f+S>Q=H&BX_p6n58ka9x+OXx?ah2xzdJx{&x6rRiV*z3aCe? zQ(-a#h)@F4OR{usB^;Tg5BVYd|FTd2Isz$ob_aQom&_U*N?0vwh9)$G`vklJ&ZL@$ zS}C_tRx$5=bS-*6XR*j0Y{Z)Jx~N29u$8T27B@sZHE$~Ed#{nr*(fL3UsNWYy1`Hj zJ;t&tV-X2zenATK3{%^POj3fv=2^5QM~wZI*H3wLvt(Ne0c=A4lW667y%~)l3`!!L zN#!`S8`d6KPESz^g9|gafe-5|PSu0$=s2b{b28Tyl;0*!eprGL;UVk8y+#&4BT$}3 zLurIX5*`=#AuYzWoYPX3~W0-vuv7Hx`9mD-awpC-M|x>}3CK!KgFXX6fxm$;O`6HT7->?1j^16d$)h~7i+LOj!Yn&YzXR^*Xe|4|ZAt6L} zgs!k^T{gPckYHn9VO$?4IyngijWe-STh0_1zf|MgGLwJ*TD&j`!jI`eBSR3mhEEOA7t&2T{sk;VN-YGlQU%dqT?r%!}tW(j;RrLc-u=)fr6 z+nvC2eUA^T@u1%>TT0Su9#Jalr&Y-0L7OH$E!CZ)Zvo<)QK@p6kmvtpV{q;`z9m2; z@rL;mTkBKS{e5|f^|zT1$~EoZ8`Hj9k@#>cvRa%*WT0Qs&%xE^MVi^e8p)^gYddup zJT}e^qi-D9jYxKnrxSQL^Asd&DbK`2r{Qmq5@Gd>3rGiyEbflSs*KzZfuOgUtyu+u@GOqN z*4jq(JX~cRJ6+g;pJKijkF~&sP2d^x?M}P{*#oqx?}GayQXYk7gW+osK6ng!3GeCv za=1s`5BS*57^Zu@;PZ^KFQjH@`c+r27H#OdxC!oL>V#r?lAOgfE- zk}&KHL(<#}DL8P;hoy6LY%pCU$yzxSnT5@T^%Uuq^Aql zKlN=?N2bu0fxYZ{Wyiz!*OxHMWfJN|qBc#rRJdMyMeagAfvG}e1xZRwmYwxzyu-Nq zs*XsLa|&m76X~SISS-7sveXMJZVyvT_4?vYRivp3Rpdo4)PQyOJYpuQa#5#gqRTmI z?;J`vjXNqG=z;K6Q_RghbH#o7lR*em<^6?8K#o+_mc91WbMlm5EZKC%Y`NAJT9~)j z*DK^T^mgP$Nf0p$YU41=Tp-EPo>zP{NqW>s2m9iKfCrM-ipOGwI{7D7=jdtd#FhPS z5X!}&H)})mCYj^i`0E zmy0XZv132EQ0h^mS2;&%NvdM&1!}z-rvKL08s`lr6m#-_;?)=0r?%mk{d#~us=R!$ zNTr)+0$#1~R;p%_j7KOCrtCeSpt|1{;~hnArG(1K2n!WdqHw&E0olpNdI(b?*p0dw zhhTaTuLri&S-R6Q(!@+Ht2H3?Z6QL~R@o(cn8Zk36nVpJm-w1(-BJ9UfS-++{toiX zp8hjy(T{^iNX2c1AmZED2m9&S&)in zj_Om>W0NVv=ifh#Dmxmu4iih81(M1a1{88b9|)ko{*_-IUo6U8Ua5|KrPL?4yu6nh ztA~==S~2xait^X|L*R3g;J5d)3TI1>KQCLD_Si;YLR?^)9;q|;PFVPk+gJjb?#xF< z4Pch(8YaW>G^Zy<3olhJoaF*H)cQ3UjD$~nte#7}tbQP*T*~?EYmz9wcJ`E3(Jx(a zm2~XNxNCL$$$`Ai7%f|P1Wozgw!IP9V)4tFcy)h_=^h5!MHL8BWV4UaI~rLhczhkL zXsIK6_vFDgmWLuay)6zPTJw^0 zeItK}gG7<;d6Rg1cY*_Rr26lScEgyr-}_JESa6aDm&5lqOvw4-4uz z_omb}_ycB-rGxC)I}+sl-*^^VD(P1FyRhQ_*qBEM-hvc4r!yn?``{$_FXHbA6hmnv zcpN%E*lt*_^fx-h#!spyqp>plKCKw17C#x%a(iWdDlNrMd?h2d2Vef=^eYQG(~e!!{A@ zO@gYsRiM3r5&l|Q;rdX22}#aDyy9SAay~0nIO;THg96ouh+)LGl)rl}-DZs^gvtGV zF9+5GYV800)9ap(zqg9WzYYKIFR*_t^Y60Ie-{A&Dk2T<6Vm^b`M)Nkf4%+R0r=0` c+ll`JhlV=9`v(93*!P_r3jjEw{M+ik0I$%29smFU literal 0 HcmV?d00001 diff --git a/tests/paper_mgmt/test_case_study.py b/tests/paper_mgmt/test_case_study.py index 61aba4718..43041c192 100644 --- a/tests/paper_mgmt/test_case_study.py +++ b/tests/paper_mgmt/test_case_study.py @@ -235,6 +235,46 @@ def test_get_newest_result_files_for_case_study_with_empty_res_dir( ), [] ) + @run_in_test_environment( + UnitTestFixtures.PAPER_CONFIGS, UnitTestFixtures.RESULT_FILES + ) + def test_get_newest_result_files_for_case_study_with_config(self) -> None: + """Check that when we have two files, the newes one get's selected.""" + vara_cfg()['paper_config']['current_config'] = "test_config_ids" + load_paper_config() + + config_0_file = ReportFilename( + "BBBase-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/" + "b24ee2c1-fc85-47ba-abbd-90c98e88a37c_config-0_success.zip" + ) + config_1_file = ReportFilename( + "BBBase-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/" + "8380144f-9a25-44c6-8ce0-08d0a29c677b_config-1_success.zip" + ) + + now = datetime.now().timestamp() + file_path_0 = Path( + str(vara_cfg()['result_dir']) + ) / 'SynthSAContextSensitivity' / config_0_file.filename + os.utime(file_path_0, (now, now)) + + file_path_1 = Path( + str(vara_cfg()['result_dir']) + ) / 'SynthSAContextSensitivity' / config_1_file.filename + os.utime(file_path_1, (now, now)) + + newest_res_files = MCS.get_newest_result_files_for_case_study( + get_paper_config().get_case_studies('SynthSAContextSensitivity')[0], + Path(vara_cfg()['result_dir'].value), CR + ) + + newest_res_files.sort(reverse=True) + newest_res_filenames = [ReportFilename(x) for x in newest_res_files] + + self.assertEqual(newest_res_filenames[0].config_id, 0) + self.assertEqual(newest_res_filenames[1].config_id, 1) + self.assertEqual(len(newest_res_filenames), 2) + def test_get_case_study_file_name_filter_empty(self) -> None: """Check that we correctly handle case study filter generation even if no case study was provided.""" diff --git a/varats/varats/paper_mgmt/case_study.py b/varats/varats/paper_mgmt/case_study.py index b064c29b2..a0280e949 100644 --- a/varats/varats/paper_mgmt/case_study.py +++ b/varats/varats/paper_mgmt/case_study.py @@ -301,26 +301,34 @@ def get_newest_result_files_for_case_study( Returns: list of result file paths """ - files_to_store: tp.Dict[ShortCommitHash, Path] = {} + files_to_store: tp.Dict[tp.Tuple[ShortCommitHash, tp.Optional[int]], + Path] = {} result_dir /= case_study.project_name if not result_dir.exists(): return [] - for opt_res_file in result_dir.iterdir(): - report_file = ReportFilename(opt_res_file.name) + for opt_res_file in result_dir.rglob("*"): + report_file = ReportFilename.construct(opt_res_file, result_dir) if report_type.is_correct_report_type(report_file.filename): commit_hash = report_file.commit_hash - if case_study.has_revision(commit_hash): - current_file = files_to_store.get(commit_hash, None) + config_id = report_file.config_id + config_id_matches = ( + config_id is None or config_id + in case_study.get_config_ids_for_revision(commit_hash) + ) + + if case_study.has_revision(commit_hash) and config_id_matches: + current_file = files_to_store.get((commit_hash, config_id), + None) if current_file is None: - files_to_store[commit_hash] = opt_res_file + files_to_store[(commit_hash, config_id)] = opt_res_file else: if ( current_file.stat().st_mtime < opt_res_file.stat().st_mtime ): - files_to_store[commit_hash] = opt_res_file + files_to_store[(commit_hash, config_id)] = opt_res_file return list(files_to_store.values()) diff --git a/varats/varats/paper_mgmt/paper_config_manager.py b/varats/varats/paper_mgmt/paper_config_manager.py index 4a418fd44..14c6d0d21 100644 --- a/varats/varats/paper_mgmt/paper_config_manager.py +++ b/varats/varats/paper_mgmt/paper_config_manager.py @@ -377,10 +377,10 @@ def package_paper_config( result_dir = Path(str(vara_cfg()['result_dir'])) report_types: tp.List[tp.Type[BaseReport]] = [] if experiment_types: - report_types = list(BaseReport.REPORT_TYPES.values()) - else: for experiment_type in experiment_types: report_types.extend(experiment_type.report_spec().report_types) + else: + report_types = list(BaseReport.REPORT_TYPES.values()) files_to_store: tp.Set[Path] = set() for case_study in current_config.get_all_case_studies(): From 4bad911b88c03ffdac3c0b784d2ace9428f7e687 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 24 Aug 2023 09:29:22 +0200 Subject: [PATCH 089/224] Cleans up patch provider --- .../varats/provider/patch/patch_provider.py | 63 +++++++++---------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index e979eee8b..fb90b4896 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -1,5 +1,11 @@ +""" +Module for the :class:`PatchProvider`. + +The patch provider enables users to query patches for project, which can be +applied during an experiment to alter the state of the project. +""" + import os -import textwrap import typing as tp import warnings from pathlib import Path @@ -8,27 +14,16 @@ import yaml from benchbuild.project import Project from benchbuild.source.base import target_prefix -from benchbuild.utils import actions -from benchbuild.utils.actions import StepResult -from benchbuild.utils.revision_ranges import ( - _get_all_revisions_between, - _get_git_for_path, -) -from plumbum import local, ProcessExecutionError from yaml import YAMLError from varats.project.project_util import get_local_project_git_path -from varats.project.varats_project import VProject from varats.provider.provider import Provider, ProviderType -from varats.utils.git_commands import ( - pull_current_branch, - apply_patch, - revert_patch, -) +from varats.utils.git_commands import pull_current_branch, fetch_repository from varats.utils.git_util import ( CommitHash, ShortCommitHash, get_all_revisions_between, + get_initial_commit, ) @@ -69,9 +64,10 @@ def from_yaml(yaml_path: Path): if "tags" in yaml_dict: tags = yaml_dict["tags"] - main_repo_git = _get_git_for_path( - get_local_project_git_path(project_name) - ) + project_git_path = get_local_project_git_path(project_name) + + # Update repository to have all upstream changes + fetch_repository(project_git_path) def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: res: tp.Set[CommitHash] = set() @@ -90,8 +86,7 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: res.update( get_all_revisions_between( rev_range["start"], rev_range["end"], - ShortCommitHash, - get_local_project_git_path(project_name) + ShortCommitHash, project_git_path ) ) else: @@ -99,7 +94,7 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: get_all_revisions_between( rev_dict["revision_range"]["start"], rev_dict["revision_range"]["end"], ShortCommitHash, - get_local_project_git_path(project_name) + project_git_path ) ) @@ -108,11 +103,12 @@ def parse_revisions(rev_dict: tp.Dict) -> tp.Set[CommitHash]: if "include_revisions" in yaml_dict: include_revisions = parse_revisions(yaml_dict["include_revisions"]) else: - include_revisions = { - ShortCommitHash(h) - for h in main_repo_git('log', '--pretty=%H', '--first-parent' - ).strip().split() - } + include_revisions: tp.Set[CommitHash] = set( + get_all_revisions_between( + get_initial_commit(project_git_path).hash, "", + ShortCommitHash, project_git_path + ) + ) if "exclude_revisions" in yaml_dict: include_revisions.difference_update( @@ -168,9 +164,10 @@ def __getitem__(self, tags: tp.Union[str, tp.Iterable[str]]): Returns a PatchSet, such that all patches include all the tags given """ - # TODO: Discuss if we really want this. Currently this is an "all_of" access - # We could consider to remove the bracket operator and only provide the all_of/any_of accessors as it - # would be clearer what the exact behaviour is + # TODO: Discuss if we really want this. Currently this is an "all_of" + # access We could consider to remove the bracket operator and only + # provide the all_of/any_of accessors as it would be clearer what the + # exact behavior is # Trick to handle correct set construction if just a single tag is given if isinstance(tags, str): @@ -199,9 +196,9 @@ def any_of(self, tags: tp.Union[str, tp.Iterable[str]]) -> "PatchSet": if isinstance(tags, str): tags = [tags] - result = set() + result: tp.Set[Patch] = set() for patch in self: - if patch.tags and any([tag in patch.tags for tag in tags]): + if patch.tags and any(tag in patch.tags for tag in tags): result.add(patch) return PatchSet(result) @@ -248,7 +245,8 @@ def __init__(self, project: tp.Type[Project]): if not patches_project_dir.is_dir(): warnings.warn( - f"Could not find patches directory for project '{self.project.NAME}'." + "Could not find patches directory for project " + f"'{self.project.NAME}'." ) patches = set() @@ -295,7 +293,8 @@ def create_provider_for_project( Creates a provider instance for the given project. Note: - A provider may not contain any patches at all if there are no existing patches for a project + A provider may not contain any patches at all if there are no + existing patches for a project Returns: a provider instance for the given project From 16f7fe56e87afb86de5a093c6bcb7e768ce5049a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 24 Aug 2023 09:30:01 +0200 Subject: [PATCH 090/224] Updates code to merged changes --- .../data/databases/feature_perf_precision_database.py | 4 ++-- varats/varats/experiments/vara/feature_perf_precision.py | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 737112326..d162271b9 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -8,7 +8,7 @@ from scipy.stats import ttest_ind import varats.experiments.vara.feature_perf_precision as fpp -from varats.data.metrics import ClassificationResults +from varats.data.metrics import ConfusionMatrix from varats.experiments.vara.feature_experiment import FeatureExperiment from varats.paper.case_study import CaseStudy from varats.paper_mgmt.case_study import get_case_study_file_name_filter @@ -561,7 +561,7 @@ def load_precision_data(case_studies, profilers) -> pd.DataFrame: ) if ground_truth and predicted: - results = ClassificationResults( + results = ConfusionMatrix( map_to_positive_config_ids(ground_truth), map_to_negative_config_ids(ground_truth), map_to_positive_config_ids(predicted), diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 04173bcc1..32d5436d9 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -27,6 +27,7 @@ ZippedExperimentSteps, OutputFolderStep, ) +from varats.experiment.steps.patch import ApplyPatch, RevertPatch from varats.experiment.steps.recompile import ReCompile from varats.experiment.workload_util import WorkloadCategory, workload_commands from varats.experiments.vara.feature_experiment import ( @@ -36,11 +37,7 @@ from varats.project.project_domain import ProjectDomains from varats.project.project_util import BinaryType, ProjectBinaryWrapper from varats.project.varats_project import VProject -from varats.provider.patch.patch_provider import ( - PatchProvider, - ApplyPatch, - RevertPatch, -) +from varats.provider.patch.patch_provider import PatchProvider from varats.report.gnu_time_report import TimeReportAggregate from varats.report.linux_perf_report import LinuxPerfReportAggregate from varats.report.multi_patch_report import MultiPatchReport From cc8955f4f8a3f34bfc3bcbb0d5db8a0deaa53802 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 24 Aug 2023 09:54:42 +0200 Subject: [PATCH 091/224] More clean up for patch provider --- .../varats/provider/patch/patch_provider.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index fb90b4896..b370df208 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -43,7 +43,8 @@ def __init__( self.shortname: str = shortname self.description: str = description self.path: Path = path - self.valid_revisions: tp.Optional[tp.Set[CommitHash]] = valid_revisions + self.valid_revisions: tp.Set[ + CommitHash] = valid_revisions if valid_revisions else set() self.tags: tp.Optional[tp.Set[str]] = tags @staticmethod @@ -152,8 +153,8 @@ def __init__(self, patches: tp.Set[Patch]): def __iter__(self) -> tp.Iterator[Patch]: return self.__patches.__iter__() - def __contains__(self, v: tp.Any) -> bool: - return self.__patches.__contains__(v) + def __contains__(self, value: tp.Any) -> bool: + return self.__patches.__contains__(value) def __len__(self) -> int: return len(self.__patches) @@ -209,7 +210,7 @@ def all_of(self, tags: tp.Union[str, tp.Iterable[str]]) -> "PatchSet": Equivalent to bracket operator (__getitem__) """ - return self.__getitem__(tags) + return self[tags] def __hash__(self) -> int: return hash(self.__patches) @@ -249,9 +250,9 @@ def __init__(self, project: tp.Type[Project]): f"'{self.project.NAME}'." ) - patches = set() + self.__patches: tp.Set[Patch] = set() - for root, dirs, files in os.walk(patches_project_dir): + for root, _, files in os.walk(patches_project_dir): for filename in files: if not filename.endswith(".info"): continue @@ -259,14 +260,12 @@ def __init__(self, project: tp.Type[Project]): info_path = Path(os.path.join(root, filename)) try: current_patch = Patch.from_yaml(info_path) - patches.add(current_patch) + self.__patches.add(current_patch) except YAMLError: warnings.warn( f"Unable to parse patch info in: '{filename}'" ) - self.__patches: tp.Set[Patch] = patches - def get_by_shortname(self, shortname: str) -> tp.Optional[Patch]: """ Returns a patch with a specific shortname, if such a patch exists. From 649449a0e566e0e3dd1bfb3eb4572cce2019d5d7 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 31 Aug 2023 10:18:15 +0200 Subject: [PATCH 092/224] Fixes PIM computation --- .../feature_perf_precision_database.py | 77 +++-- .../perf_tests/feature_perf_cs_collection.py | 300 ++++++++++++++++++ 2 files changed, 351 insertions(+), 26 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index d162271b9..335432666 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -171,6 +171,10 @@ def is_regression( new_acc_pim[feature].append(value) for feature, old_values in old_acc_pim.items(): + if feature == "Base": + # The regression should be identified in actual feature code + continue + if feature in new_acc_pim: new_values = new_acc_pim[feature] ttest_res = ttest_ind(old_values, new_values) @@ -199,6 +203,24 @@ def __init__(self) -> None: fpp.MPRPIMAggregate ) + @staticmethod + def __aggregate_pim_data(reports) -> tp.DefaultDict[str, tp.List[int]]: + acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for old_pim_report in reports: + per_report_acc_pim: tp.DefaultDict[str, int] = defaultdict(int) + for region_inter in old_pim_report.region_interaction_entries: + name = get_interactions_from_fr_string( + old_pim_report._translate_interaction( + region_inter.interaction + ) + ) + per_report_acc_pim[name] += region_inter.time + + for name, time_value in per_report_acc_pim.items(): + acc_pim[name].append(time_value) + + return acc_pim + def is_regression( self, report_path: ReportFilepath, patch_name: str ) -> bool: @@ -209,35 +231,28 @@ def is_regression( report_path.full_path(), fpp.PerfInfluenceTraceReportAggregate ) - old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - for old_pim_report in multi_report.get_baseline_report().reports(): - for region_inter in old_pim_report.region_interaction_entries: - name = get_interactions_from_fr_string( - old_pim_report._translate_interaction( - region_inter.interaction - ) - ) - time = region_inter.time - old_acc_pim[name].append(time) + try: + old_acc_pim = self.__aggregate_pim_data( + multi_report.get_baseline_report().reports() + ) - new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) - opt_mr = multi_report.get_report_for_patch(patch_name) - if not opt_mr: - raise NotImplementedError() + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() - for new_pim_report in opt_mr.reports(): - for region_inter in new_pim_report.region_interaction_entries: - name = get_interactions_from_fr_string( - new_pim_report._translate_interaction( - region_inter.interaction - ) - ) - time = region_inter.time - new_acc_pim[name].append(time) + new_acc_pim = self.__aggregate_pim_data(opt_mr.reports()) + except Exception as e: + print(f"FAILURE: Report parsing failed: {report_path}") + print(e) + return False # TODO: same for TEF for feature, old_values in old_acc_pim.items(): if feature in new_acc_pim: + if feature == "Base": + # The regression should be identified in actual feature code + continue + new_values = new_acc_pim[feature] ttest_res = ttest_ind(old_values, new_values) @@ -291,6 +306,10 @@ def is_regression( new_acc_pim[feature].append(value) for feature, old_values in old_acc_pim.items(): + if feature == "Base": + # The regression should be identified in actual feature code + continue + if feature in new_acc_pim: new_values = new_acc_pim[feature] ttest_res = ttest_ind(old_values, new_values) @@ -432,9 +451,15 @@ def compute_profiler_predictions( ) return None - result_dict[config_id] = profiler.is_regression( - report_files[0], patch_name - ) + try: + result_dict[config_id] = profiler.is_regression( + report_files[0], patch_name + ) + except Exception: + print( + f"FAILURE: Skipping {config_id=} of {project_name=}, " + f"profiler={profiler.name}" + ) return result_dict diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 9d5ffa0cc..10fa7c275 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -393,3 +393,303 @@ def compile(self) -> None: def recompile(self) -> None: """Recompile the project.""" _do_feature_perf_cs_collection_recompile(self) + + +class SynthDAVirtualInheritance(VProject): + """Synthetic case-study project for testing detection of virtual + inheritance.""" + + NAME = 'SynthDAVirtualInheritance' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthDAVirtualInheritance", + refspec="origin/master", + limit=None, + shallow=False, + version_filter=project_filter_generator( + "SynthDAVirtualInheritance" + ) + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("SynthDAVirtualInheritance") / + RSBinary("VirtualInheritance"), + label="VirtualInheritance-no-input" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthDAVirtualInheritance.NAME) + ) + + binary_map.specify_binary( + "build/bin/VirtualInheritance", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("96848fadf1", "master") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHDAVIRTUALINHERITANCE" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + + +class SynthDARecursion(VProject): + """Synthetic case-study project for testing detection of recursion.""" + + NAME = 'SynthDARecursion' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthDARecursion", + refspec="origin/master", + limit=None, + shallow=False, + version_filter=project_filter_generator("SynthDARecursion") + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("SynthDARecursion") / RSBinary("Recursion"), + label="Recursion-no-input" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthDARecursion.NAME) + ) + + binary_map.specify_binary( + "build/bin/Recursion", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("96848fadf1", "master") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHDARECURSION" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + + +class SynthOVInsideLoop(VProject): + """Synthetic case-study project for testing detection of hot loop codes.""" + + NAME = 'SynthOVInsideLoop' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthOVInsideLoop", + refspec="origin/master", + limit=None, + shallow=False, + version_filter=project_filter_generator("SynthOVInsideLoop") + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("SynthOVInsideLoop") / RSBinary("InsideLoop"), + label="InsideLoop-no-input" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthOVInsideLoop.NAME) + ) + + binary_map.specify_binary( + "build/bin/InsideLoop", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("96848fadf1", "master") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHOVINSIDELOOP" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + + +class SynthFeatureInteraction(VProject): + """Synthetic case-study project for testing detection of feature + interactions.""" + + NAME = 'SynthFeatureInteraction' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthFeatureInteraction", + refspec="origin/master", + limit=None, + shallow=False, + version_filter=project_filter_generator("SynthFeatureInteraction") + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("SynthFeatureInteraction") / + RSBinary("FeatureInteraction"), + label="FeatureInteraction-no-input" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthFeatureInteraction.NAME) + ) + + binary_map.specify_binary( + "build/bin/FeatureInteraction", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("96848fadf1", "master") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHFEATUREINTERACTION" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + + +class SynthFeatureHigherOrderInteraction(VProject): + """Synthetic case-study project for testing detection of higher-order + feature interactions.""" + + NAME = 'SynthFeatureHigherOrderInteraction' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthFeatureHigherOrderInteraction", + refspec="origin/master", + limit=None, + shallow=False, + version_filter=project_filter_generator( + "SynthFeatureHigherOrderInteraction" + ) + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("SynthFeatureHigherOrderInteraction") / + RSBinary("HigherOrderInteraction"), + label="HigherOrderInteraction-no-input" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthFeatureHigherOrderInteraction.NAME) + ) + + binary_map.specify_binary( + "build/bin/HigherOrderInteraction", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("daf81de073", "master") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHFEATUREHIGHERORDERINTERACTION" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) From f64598a7bdf47bd6cf4cf2cdbd9428bed134bdeb Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 31 Aug 2023 10:19:11 +0200 Subject: [PATCH 093/224] Fixes multi step error tracking --- varats-core/varats/experiment/experiment_util.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index 99c6b35bd..a81dcc5d4 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -554,11 +554,17 @@ def __run_children(self, tmp_folder: Path) -> tp.List[StepResult]: def __call__(self) -> StepResult: results: tp.List[StepResult] = [] + exception_raised_during_exec = False with ZippedReportFolder(self.__output_filepath.full_path()) as tmp_dir: - results = self.__run_children(Path(tmp_dir)) + try: + results = self.__run_children(Path(tmp_dir)) + except: # noqa: E722 + exception_raised_during_exec = True + raise overall_step_result = max(results) if results else StepResult.OK - if overall_step_result is not StepResult.OK: + if overall_step_result is not StepResult.OK \ + or exception_raised_during_exec: error_filepath = self.__output_filepath.with_status( FileStatusExtension.FAILED ) From b1a26d1e81447b33ddc017c54cf65a9c7eab2322 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 31 Aug 2023 10:20:20 +0200 Subject: [PATCH 094/224] Fixes missing folder creation --- varats-core/varats/utils/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varats-core/varats/utils/settings.py b/varats-core/varats/utils/settings.py index 19940a7ac..bac6860ad 100644 --- a/varats-core/varats/utils/settings.py +++ b/varats-core/varats/utils/settings.py @@ -316,7 +316,7 @@ def create_missing_folder_for_cfg( if config_node.has_value() and\ config_node.value is not None and\ not path.isdir(config_node.value): - makedirs(config_node.value) + makedirs(config_node.value, exist_ok=True) create_missing_folder_for_cfg("benchbuild_root") create_missing_folder_for_cfg("result_dir") @@ -339,7 +339,7 @@ def create_missing_folder_for_cfg( if config_node.has_value() and\ config_node.value is not None and\ not path.isdir(str(config_node.value)): - makedirs(str(config_node.value)) + makedirs(str(config_node.value), exist_ok=True) create_missing_folder_for_cfg("outfile", bb_cfg()["varats"]) create_missing_folder_for_cfg("result", bb_cfg()["varats"]) From d53aecffed9e32327da99e3ff1f4ba4fd339126d Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 31 Aug 2023 10:27:54 +0200 Subject: [PATCH 095/224] Adapts interfaces --- varats/varats/plots/feature_perf_precision.py | 1 - varats/varats/tables/feature_perf_precision.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index d8bf37178..eea4cb207 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -59,7 +59,6 @@ def plot(self, view_mode: bool) -> None: df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") - print(f"{df['Profiler']=}") grid = multivariate_grid( df, 'precision', diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 3fc3ede27..357d9fad3 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -17,7 +17,7 @@ compute_profiler_predictions, OverheadData, ) -from varats.data.metrics import ClassificationResults +from varats.data.metrics import ConfusionMatrix from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config from varats.table.table import Table @@ -66,7 +66,7 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: ) if ground_truth and predicted: - results = ClassificationResults( + results = ConfusionMatrix( map_to_positive_config_ids(ground_truth), map_to_negative_config_ids(ground_truth), map_to_positive_config_ids(predicted), From a7cfb7c9b76f5b0ec4647c34047143dab3252105 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 31 Aug 2023 10:29:43 +0200 Subject: [PATCH 096/224] Fixes renaming --- .../perf_tests/feature_perf_cs_collection.py | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 10fa7c275..3e3ffb73d 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -395,24 +395,22 @@ def recompile(self) -> None: _do_feature_perf_cs_collection_recompile(self) -class SynthDAVirtualInheritance(VProject): +class SynthDADynamicDispatch(VProject): """Synthetic case-study project for testing detection of virtual inheritance.""" - NAME = 'SynthDAVirtualInheritance' + NAME = 'SynthDADynamicDispatch' GROUP = 'perf_tests' DOMAIN = ProjectDomains.TEST SOURCE = [ bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", - local="SynthDAVirtualInheritance", + local="SynthDADynamicDispatch", refspec="origin/master", limit=None, shallow=False, - version_filter=project_filter_generator( - "SynthDAVirtualInheritance" - ) + version_filter=project_filter_generator("SynthDADynamicDispatch") ), FeatureSource() ] @@ -420,9 +418,9 @@ class SynthDAVirtualInheritance(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ Command( - SourceRoot("SynthDAVirtualInheritance") / - RSBinary("VirtualInheritance"), - label="VirtualInheritance-no-input" + SourceRoot("SynthDADynamicDispatch") / + RSBinary("DynamicDispatch"), + label="DynamicDispatch-no-input" ) ] } @@ -432,11 +430,11 @@ def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 ) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap( - get_local_project_git_path(SynthDAVirtualInheritance.NAME) + get_local_project_git_path(SynthDADynamicDispatch.NAME) ) binary_map.specify_binary( - "build/bin/VirtualInheritance", + "build/bin/DynamicDispatch", BinaryType.EXECUTABLE, only_valid_in=RevisionRange("96848fadf1", "master") ) @@ -449,7 +447,7 @@ def run_tests(self) -> None: def compile(self) -> None: """Compile the project.""" _do_feature_perf_cs_collection_compile( - self, "FPCSC_ENABLE_PROJECT_SYNTHDAVIRTUALINHERITANCE" + self, "FPCSC_ENABLE_PROJECT_SYNTHDADYNAMICDISPATCH" ) def recompile(self) -> None: From a22a17b362f82b8166a9b627207eb83d980a200e Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 2 Sep 2023 15:34:47 +0200 Subject: [PATCH 097/224] Implements special handling for ebpftrace to correctly handle nfs shares --- .../vara/feature_perf_precision.py | 163 +++++++++++------- 1 file changed, 98 insertions(+), 65 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 32d5436d9..a52c949db 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -1,5 +1,6 @@ """Module for feature performance precision experiments that evaluate measurement support of vara.""" +import tempfile import textwrap import typing as tp from abc import abstractmethod @@ -187,53 +188,72 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): zip_tmp_dir = tmp_dir / self._file_name - with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: - for rep in range(0, self._reps): - for prj_command in workload_commands( - self.project, self._binary, [WorkloadCategory.EXAMPLE] - ): - local_tracefile_path = Path(reps_tmp_dir) / ( - f"trace_{prj_command.command.label}_{rep}" - f".{self._report_file_ending}" - ) - - with local.env(VARA_TRACE_FILE=local_tracefile_path): - pb_cmd = prj_command.command.as_plumbum( - project=self.project - ) - print( - f"Running example {prj_command.command.label}" + with tempfile.TemporaryDirectory() as non_nfs_tmp_dir: + with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: + for rep in range(0, self._reps): + for prj_command in workload_commands( + self.project, self._binary, + [WorkloadCategory.EXAMPLE] + ): + local_tracefile_path = Path(reps_tmp_dir) / ( + f"trace_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" ) - extra_options = get_extra_config_options( - self.project - ) + with local.env( + VARA_TRACE_FILE=local_tracefile_path + ): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print( + f"Running example {prj_command.command.label}" + ) - bpf_runner = bpf_runner = self.attach_usdt_raw_tracing( - local_tracefile_path, - self.project.source_of_primary / - self._binary.path - ) + adapted_binary_location = Path( + non_nfs_tmp_dir + ) / self._binary.name - with cleanup(prj_command): - pb_cmd( - *extra_options, - retcode=self._binary.valid_exit_codes + # Store binary in a local tmp dir that is not on nfs + pb_cmd.executable = pb_cmd.executable.copy( + adapted_binary_location, override=True ) - # wait for bpf script to exit - if bpf_runner: - bpf_runner.wait() + extra_options = get_extra_config_options( + self.project + ) + + bpf_runner = bpf_runner = self.attach_usdt_raw_tracing( + local_tracefile_path, + adapted_binary_location, + Path(non_nfs_tmp_dir) + ) + + with cleanup(prj_command): + pb_cmd( + *extra_options, + retcode=self._binary.valid_exit_codes + ) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() return StepResult.OK @staticmethod - def attach_usdt_raw_tracing(report_file: Path, binary: Path) -> Future: + def attach_usdt_raw_tracing( + report_file: Path, binary: Path, non_nfs_tmp_dir: Path + ) -> Future: """Attach bpftrace script to binary to activate raw USDT probes.""" - bpftrace_script_location = Path( + orig_bpftrace_script_location = Path( VaRA.install_location(), "share/vara/perf_bpf_tracing/RawUsdtTefMarker.bt" ) + # Store bpftrace script in a local tmp dir that is not on nfs + bpftrace_script_location = non_nfs_tmp_dir / "RawUsdtTefMarker.bt" + cp(orig_bpftrace_script_location, bpftrace_script_location) + bpftrace_script = bpftrace["-o", report_file, "-q", bpftrace_script_location, binary] bpftrace_script = bpftrace_script.with_env(BPFTRACE_PERF_RB_PAGES=4096) @@ -760,46 +780,59 @@ def __str__(self, indent: int = 0) -> str: def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): - for rep in range(0, self._reps): - for prj_command in workload_commands( - self.project, self._binary, [WorkloadCategory.EXAMPLE] - ): - base = Path("/tmp/") - fake_tracefile_path = base / ( - f"trace_{prj_command.command.label}_{rep}" - f".json" - ) - - time_report_file = tmp_dir / ( - f"overhead_{prj_command.command.label}_{rep}" - f".{self._report_file_ending}" - ) + with tempfile.TemporaryDirectory() as non_nfs_tmp_dir: + for rep in range(0, self._reps): + for prj_command in workload_commands( + self.project, self._binary, [WorkloadCategory.EXAMPLE] + ): + base = Path("/tmp/") + fake_tracefile_path = base / ( + f"trace_{prj_command.command.label}_{rep}" + f".json" + ) - with local.env(VARA_TRACE_FILE=fake_tracefile_path): - pb_cmd = prj_command.command.as_plumbum( - project=self.project + time_report_file = tmp_dir / ( + f"overhead_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" ) - print(f"Running example {prj_command.command.label}") - timed_pb_cmd = perf["stat", "-o", time_report_file, - "--", pb_cmd] + with local.env(VARA_TRACE_FILE=fake_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print( + f"Running example {prj_command.command.label}" + ) + adapted_binary_location = Path( + non_nfs_tmp_dir + ) / self._binary.name - extra_options = get_extra_config_options(self.project) + # Store binary in a local tmp dir that is not on nfs + pb_cmd.executable = pb_cmd.executable.copy( + adapted_binary_location, override=True + ) - bpf_runner = RunBPFTracedWorkloads.attach_usdt_raw_tracing( - fake_tracefile_path, - self.project.source_of_primary / self._binary.path - ) + timed_pb_cmd = perf["stat", "-o", time_report_file, + "--", pb_cmd] - with cleanup(prj_command): - timed_pb_cmd( - *extra_options, - retcode=self._binary.valid_exit_codes + extra_options = get_extra_config_options( + self.project ) - # wait for bpf script to exit - if bpf_runner: - bpf_runner.wait() + bpf_runner = RunBPFTracedWorkloads.attach_usdt_raw_tracing( + fake_tracefile_path, adapted_binary_location, + Path(non_nfs_tmp_dir) + ) + + with cleanup(prj_command): + timed_pb_cmd( + *extra_options, + retcode=self._binary.valid_exit_codes + ) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() return StepResult.OK From f0c9295fa724134677f361e21c372c144414f7da Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 2 Sep 2023 22:13:32 +0200 Subject: [PATCH 098/224] Fixes packaging bug with multiple exp that have the same report type --- tests/data/test_report.py | 8 ++++ tests/paper_mgmt/test_case_study.py | 54 +++++++++++++++++++++++++- varats-core/varats/report/report.py | 2 +- varats/varats/paper_mgmt/case_study.py | 17 +++++--- 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/tests/data/test_report.py b/tests/data/test_report.py index fdb8d1196..f9e5cf9ab 100644 --- a/tests/data/test_report.py +++ b/tests/data/test_report.py @@ -156,6 +156,14 @@ def test_get_uuid(self): self.assertEqual(self.report_filename.uuid, self.correct_UUID) self.assertRaises(ValueError, lambda: self.broken_report_filename.uuid) + def test_experiment_shorthand_parsing_with_path_in_name(self) -> None: + """Checks that we correctly parse the experiment shorthand also in cases + where we have a path as part of the filename.""" + prefixed = ReportFilename( + "/tmp/foobar/" + self.report_filename.filename + ) + self.assertEqual(prefixed.experiment_shorthand, "CRE") + class TestConfigReportFilename(unittest.TestCase): """Test configuration specific ReportFilename functionality.""" diff --git a/tests/paper_mgmt/test_case_study.py b/tests/paper_mgmt/test_case_study.py index 01e22ab56..7a70b3325 100644 --- a/tests/paper_mgmt/test_case_study.py +++ b/tests/paper_mgmt/test_case_study.py @@ -239,7 +239,8 @@ def test_get_newest_result_files_for_case_study_with_empty_res_dir( UnitTestFixtures.PAPER_CONFIGS, UnitTestFixtures.RESULT_FILES ) def test_get_newest_result_files_for_case_study_with_config(self) -> None: - """Check that when we have two files, the newes one get's selected.""" + """Check that when we have two files that differ in their config id, + both get selected.""" vara_cfg()['paper_config']['current_config'] = "test_config_ids" load_paper_config() @@ -273,7 +274,56 @@ def test_get_newest_result_files_for_case_study_with_config(self) -> None: self.assertEqual(newest_res_filenames[0].config_id, 0) self.assertEqual(newest_res_filenames[1].config_id, 1) - self.assertEqual(len(newest_res_filenames), 2) + self.assertEqual(newest_res_filenames[2].config_id, 0) + self.assertEqual(newest_res_filenames[3].config_id, 1) + self.assertEqual(len(newest_res_filenames), 4) + + @run_in_test_environment( + UnitTestFixtures.PAPER_CONFIGS, UnitTestFixtures.RESULT_FILES + ) + def test_get_newest_result_files_for_case_study_with_diff_exp(self) -> None: + """Check that when we have two files that differ in their experiment + shorthand, both get selected.""" + vara_cfg()['paper_config']['current_config'] = "test_config_ids" + load_paper_config() + + config_0_file = ReportFilename( + "BBBase-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/" + "b24ee2c1-fc85-47ba-abbd-90c98e88a37c_config-0_success.zip" + ) + config_1_file = ReportFilename( + "BBBaseO-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/" + "b24ee2c1-fc85-47ba-abbd-90c98e88a37c_config-0_success.zip" + ) + + now = datetime.now().timestamp() + file_path_0 = Path( + str(vara_cfg()['result_dir']) + ) / 'SynthSAContextSensitivity' / config_0_file.filename + os.utime(file_path_0, (now, now)) + + file_path_1 = Path( + str(vara_cfg()['result_dir']) + ) / 'SynthSAContextSensitivity' / config_1_file.filename + os.utime(file_path_1, (now, now)) + + newest_res_files = MCS.get_newest_result_files_for_case_study( + get_paper_config().get_case_studies('SynthSAContextSensitivity')[0], + Path(vara_cfg()['result_dir'].value), CR + ) + + newest_res_files.sort(reverse=True) + newest_res_filenames = [ReportFilename(x) for x in newest_res_files] + + self.assertEqual( + newest_res_filenames[0].experiment_shorthand, "BBBaseO" + ) + self.assertEqual( + newest_res_filenames[1].experiment_shorthand, "BBBaseO" + ) + self.assertEqual(newest_res_filenames[2].experiment_shorthand, "BBBase") + self.assertEqual(newest_res_filenames[3].experiment_shorthand, "BBBase") + self.assertEqual(len(newest_res_filenames), 4) def test_get_case_study_file_name_filter_empty(self) -> None: """Check that we correctly handle case study filter generation even if diff --git a/varats-core/varats/report/report.py b/varats-core/varats/report/report.py index 0a649a7dc..ccbffcdbc 100644 --- a/varats-core/varats/report/report.py +++ b/varats-core/varats/report/report.py @@ -311,7 +311,7 @@ def experiment_shorthand(self) -> str: the experiment shorthand from a result file """ if (match := ReportFilename.__RESULT_FILE_REGEX.search(self.filename)): - return match.group("experiment_shorthand") + return match.group("experiment_shorthand").split('/')[-1] raise ValueError(f'File {self.filename} name was wrongly formatted.') diff --git a/varats/varats/paper_mgmt/case_study.py b/varats/varats/paper_mgmt/case_study.py index 823c7f154..556441cde 100644 --- a/varats/varats/paper_mgmt/case_study.py +++ b/varats/varats/paper_mgmt/case_study.py @@ -301,7 +301,7 @@ def get_newest_result_files_for_case_study( Returns: list of result file paths """ - files_to_store: tp.Dict[tp.Tuple[ShortCommitHash, tp.Optional[int]], + files_to_store: tp.Dict[tp.Tuple[ShortCommitHash, str, tp.Optional[int]], Path] = {} result_dir /= case_study.project_name @@ -319,16 +319,23 @@ def get_newest_result_files_for_case_study( ) if case_study.has_revision(commit_hash) and config_id_matches: - current_file = files_to_store.get((commit_hash, config_id), - None) + current_file = files_to_store.get( + (commit_hash, report_file.experiment_shorthand, config_id), + None + ) if current_file is None: - files_to_store[(commit_hash, config_id)] = opt_res_file + files_to_store[( + commit_hash, report_file.experiment_shorthand, config_id + )] = opt_res_file else: if ( current_file.stat().st_mtime < opt_res_file.stat().st_mtime ): - files_to_store[(commit_hash, config_id)] = opt_res_file + files_to_store[( + commit_hash, report_file.experiment_shorthand, + config_id + )] = opt_res_file return list(files_to_store.values()) From a855c75e910a4b912f9cd94bee92f9852299f22f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 2 Sep 2023 22:15:02 +0200 Subject: [PATCH 099/224] Fixes base path for nfs fixes --- varats/varats/experiments/vara/feature_perf_precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index a52c949db..c057bbd3a 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -785,7 +785,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: for prj_command in workload_commands( self.project, self._binary, [WorkloadCategory.EXAMPLE] ): - base = Path("/tmp/") + base = Path(non_nfs_tmp_dir) fake_tracefile_path = base / ( f"trace_{prj_command.command.label}_{rep}" f".json" From fcb04c3b82c7bab7946d39417c5cb2938f73bed7 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 2 Sep 2023 23:31:42 +0200 Subject: [PATCH 100/224] Changes perf_cs collection github ref to https --- tests/provider/test_patch_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/provider/test_patch_provider.py b/tests/provider/test_patch_provider.py index 692c5083a..9acebb32e 100644 --- a/tests/provider/test_patch_provider.py +++ b/tests/provider/test_patch_provider.py @@ -45,7 +45,7 @@ def setUpClass(cls) -> None: ) project_git_source = bb.source.Git( - remote="git@github.com:se-sic/FeaturePerfCSCollection.git", + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="FeaturePerfCSCollection", refspec="origin/HEAD", shallow=False, From 8e646bc9b3e5a2a275bd0e918c2a8ad5fda99e96 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 2 Sep 2023 23:44:35 +0200 Subject: [PATCH 101/224] Adds more test inputs --- ...5-44c6-8ce0-08d0a29c677b_config-1_success.zip | Bin 0 -> 4970 bytes ...5-47ba-abbd-90c98e88a37c_config-0_success.zip | Bin 0 -> 5145 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBaseO-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/8380144f-9a25-44c6-8ce0-08d0a29c677b_config-1_success.zip create mode 100644 tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBaseO-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/b24ee2c1-fc85-47ba-abbd-90c98e88a37c_config-0_success.zip diff --git a/tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBaseO-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/8380144f-9a25-44c6-8ce0-08d0a29c677b_config-1_success.zip b/tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBaseO-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/8380144f-9a25-44c6-8ce0-08d0a29c677b_config-1_success.zip new file mode 100644 index 0000000000000000000000000000000000000000..df6194dc1212a7b0d6ff924bace3515c4cd713d2 GIT binary patch literal 4970 zcmbW5WmFX2w!miq0SQ4$LO_tATM+3G6&R$uYv}G!N{~_zkRC#MNa>CNhVIT8K)So* za_@WZ-v5XDV!d-doPEw&Ywz>v{MK(PfzdHY0000s;P&%{M)skv^VlN*fE)_|5PUdw zGoD>>zfwR(4jd4>yj6b`WD1H)n_)#NO3~)7#21 zZo=!$xYzU6Lt{MWphSv~)n&0R)lNT6IqKYMDLnX+R-j#*fGM&%5?7ebd*^FSkz{#m_X1fE0w-40t+Gi*@52 z*iqw00TOE5*x7*j<)$H`o6oWsP>LjJx*(O`7^B)~U!^-QV$cFvYCl{5RFad<#P@2| z;;sKY%Kv1wKV5UEmQFn45%KY=R{lv1O_VqMoaQA9-JR8hI$}g;(A-e9xJ&*?SzU_GScp+kD+}Uwl1%3hW$l7< zz26wC)N)%CWO%zF=Lh}=#8I@!GT_=2o8|Jo_da8%*sjAkgACSijvFhYsQ_R* zUfYYw;K1AIt0#|@7A6AnT6oyiaW*bIze%_4W6BKup&l!@p^tT4jkf#=FOV4$#a6HB zab&tq*43A=e~wDBc3n*F0gkN;B z4Mut-jm{5;iHt2LX_g-=?HEa|!!c6kq6g1@;}e$?-6K|^iy>8?91Jn^Cc0ckCGt1> z1^JbZrCCd_n~gKoXkIcMLN+NqGHmU(0$H;_$W`d7Ju?vHiPNaq&2#y-QJ##d&@8-1 zQ&AQKXDenEod|o*tODz%|1^%JO*X=Bd*2dp?hlN;kmx5MAD-v+U*L7jC{daW1TB)A zx=1@`UXKEjJB4a*O+JS9`Ux#iG&3s+KRBs@`L@2Adu+mK)zi^y3~ETv#np zUgFtH+NL5sDcme+W~WImcS`cka{(R2pH`zuBwC-O`uspZmnOy*tUbHqdOrt(aE-4p z`-)&`uLJp1#L(sx_>1IM*+ala7&DBTGxU)j!QU(PgyW4$vifZbM+xch2g~jyfM}4C z2mo+|^i=!Ls5k&k_A}N{#oQ+{+4(^V>1bL9hj>ENJC3A5iZ8KmW=#tjS{SOznAJjI z4(73=yWK|qt&mO;W95WzmePGm^Ut#4*v>5@$Z@5W4Bpzzi-c^rL&bz?T{M>aF9Z*_ zb_++SPBQ}(LPS~B7DniqD3?a77DZAbLO6t})N;lgT;b_8O^#{KYI~px6g#IW8*gjC zyQ!pXtJ5LOT7MucGF{5I)xR?VGx5!UR-T=9%+(B|GOE`<68#5oH>ameZlSs-3X{+u zXzLY0s~y{Dwa8ql`?50iY6RdpCgDbTeA)96oh{>cs=bJ!afH0Lx&rz^(@FvLt<<6(eadl5?v7v z%j=gE`G6p6E#=rVsaPz#;JFD)-5_UDS{g#?a}~qW%}My%N!1#>o%>U2wwkW9ETG{) zf$*38aLeCpG(KCRFAA#Bj#9S@&;^3Cm5^_)ZLaP!?~_+G*|yBWRq6NAIi%*&slRNur^E=h^;*m-!De5}eZuhe`mAKj<)82=4i7Oq9Yj~(5f@9L zxEY699`6&(NEv-2$&1^+3<8)umsWp`$7zd&XEyjPAzU*89g{NV18qkFhH(VcjFNm; zPFX6lpWslSMkpf@*Y(90WW1fOk@PnuuQBq<3820L)-X`am*SwN2A;>X&ATD&(Vjru zG8sQa{OF>kl+RG%CV6~_IQUWXP;nEfwBs=W_knXj>Gb5LxY9pKar8h6)r2ev022V9 zcpwG$Ur538`k$ou9|3`TU~c}@*vM$n=tAxKrR||X^?nFB1vzUH1-Y7krfe7jCYa;} zI~=_90>MZ@aF`bYR_}FLpXBu@2Xy$}8$=4Kiv4`;Ab3!jdY{o`UZ7zbIx_&4s|ZAt z2C0{BSkJhN_H`e0Zzki$Y&|)xs)jnWn_P@2?8?a(4cNO&Ky% z*o&6mWX~^IO!g@HG7lPh_VDg(rj&ZzQ#s78@59b@8`_P`-`|~s-Sap(tp}hU7mCU6 z-bHnAc49=gZXIL=e-n3EKdfkZqM$r(L6E>*W-^DmIEh<~ggn2y)|t!|?mR5-J4ZSj z#wrmx7Qxx~Zp+q{bHeJvuV^W_k_;y0Y=$6n% zJX65-DY=*{C_WocCDmoH)1Tr%5hc<$vP*jQj$Gk3SkTnAFsYim0mt;MnxmfBl)rnf zWsQprXWc1!n%J^kULUPu**q~g+kUleLon6SaA)~Q(R}NODImZa!v7Tqfun1987zkeSd+(m5w^*#{K`ot*YxhsNmVpO+2Q-P8nUrbqF+VWz;8riD z1@7a&`0ws`wwDkNg3ub1DzOY?@XlGPFayHj3 zqRx}kecZXWVDX(D!YZ#^+X6kHy@LY%2~QM6o^p62GL}C<@=&L0>yW&}C~CGh6@KuY z3M-umk~u$L@82)?ke5w89%8tN0``=%_C*Di4pa~gh0n;!4}C0+o(|M~jbYCnbgd_E zo5|_9!9}tB^`QyX0`p^KjdVA4fiF>dgtqxEUQ}$oG)2)545h+^`c?#+?oU6U+>cGEZiyYucOjhe#M-3Q(A1@D52U(@W} z*N#hpA-kz8LYc)>v<_>Zrad7^P2tvSb1bPqCPVi;4!^Q|&()QwTjeAD+Ylt^YRK!@ zGwFoON}AFWUmjiTv2A@wm?Ao+)~NsR4j3mygV!E^6*rY!sp@Rj=KVd@O+(QDX9p_E zHLb~d2`XB_D(=_FlscH&QZ3^e{(+_(Ps z4R1OwhjK+OD`{ge$LXvjpc72AAbfJ06*sQr=4ygOOm+_%U=C0<)oPgUhiUt5hvvOU zCutMIWQ))D>FQ5K<<%F>thcjuB*tFBvx~>kUxn9^*Yz;{qRaTUXl9L%;$*?#yX2bO zI$TyzC)SCM_@vNPIM`tWf;ZhvZqMrJ4 zAPR=fYPP8~1H+NmS$)Sy064Ia2^{WTgQ!6V2Y#3hrlV07=6_jL8xs|QOKW1Vsk^`A zwwaiF&?cLJ7)onMyb_VQ^}d_*Sw;nWaGG$=kBbZM2mAU9yo|+uwYLSpjOXJe)E0b^ z5?bsinb_SXt*89OmcBYWoICnUaBUFP;y^FvC>$>c<^C!C(J<{a>Fqu= zqW%?6$4=Fd?Zp3naDay+m_?O%^vBDF_@tiNn5>oFx-8SNgNO9kx{d1B1^U|T? zW^Ca_16csz1qOMPfji{^)%x%yz#sbsAfxRX0mbc1J;!bJ(k4cea$KU5(Uag-5cbd5 zeMU%5x$Gv*#2~^zX{AX1Tf|dj3M@_ml8@)Eh|=VGy5T1DHE_c94=l}OiGxS~?SvB* zn|vK`JYXccIdVQdL5tCT>7wYUjac%cf`2geIrgtZ<(Eh?buY5uMd!#UaMF`sl`irK zvxSYT6c*s{jk8H$+w{hl2$_71taBsBm`IG#*K+Y0h9kB{v6ymXUp=9xzb(opEsb!^ zt+qEn>P}zJPb!JqU$<3p4u4cBm3@NytmZ2^Q3uTU7mgzCmRn3Qv>9zbB?3pTyngdm zXH1(-Ttn>g^k=S1wyRCa#Bzo1Gn!sd;?mi*^RrJ*9E=;D#Mo@P z%o0^}x#4e+gyjtD!jo~~fr{09&uYI`oq)~{8%!gsaeHZuw^qdO zAPS{JLnP<)k#s*IwGseX%juDNQm5<{U`NOj6%hn*q1lvc-Dr3PRQ$gl8ocED5PE+Y(^Ub~&Q12iv5~myf5^C1KHU5(j zw*|9^7;8^bnk7_HHnC(53&UyusoC}#V%ZCOr<#z#oAVCw_C@*8Q^`dJN&{Kzd;bh9 zV#qLe1o0SBO5)bve^)Dyt~xkFhO_9=EeH*bK$4$8l8gXO+aH#gkrR+W#D4+dIATTR zWW_9S4RlQh*6ddBRk+qlJ5I~Cq!e4v99jnP&BYvC?z6_XI5; z9fhudJuE=*5QAM|D9@(NtYd%}*uhuQN7WWh8TPz?Jmw0Ptjl&oUy@+*D|1UAH7Ur_ z$7>?bER(tRWR^{9waK`|C64Q+lO%z@h5lj&5Mm)A*(1IK-^1Uq6Og#qC)22&;t4y6 zQ1M}*CFKYo%uh_X9`%1ZJG+tr zg}SKhn>a*z%oV*br8yK8CT7KY1FKA*6zZ1MkvEVIkSmo z*KNun&1omFc-kWYzo*LJZJ!z}cJcPszShOhi0UC%b!Vn&A*=6y2`%<51}tm2p?i2ciqq+~OEr7tpq=ygvA0s>9 z@5}$Yr}Wn{|4u>vqX+fEal)#T39smHKKTJX( L03c2D_ubzB!4Qw5 literal 0 HcmV?d00001 diff --git a/tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBaseO-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/b24ee2c1-fc85-47ba-abbd-90c98e88a37c_config-0_success.zip b/tests/TEST_INPUTS/results/SynthSAContextSensitivity/BBBaseO-CR-SynthSAContextSensitivity-ContextSense-06eac0edb6/b24ee2c1-fc85-47ba-abbd-90c98e88a37c_config-0_success.zip new file mode 100644 index 0000000000000000000000000000000000000000..02e15588784a5e567cd9ddedaddada2d65970d3c GIT binary patch literal 5145 zcmbW*Ra6w-z6bC@7`jV31_9|t>F#c6K}O123t12M{S05D96KK{}*k=C5=k zLtM_e>z;ES&RyrOyZ6Jcwf1kneb#py>R{tg0{{Sg09n|jdAqvs;2$6Wz<>t;NZs#x zIrzPB0Xx}?O4$2=oqfQ*zVH~&=J^XwHgI&F{ zXM)73t(fo*n{=HRmA_^1$ESmXbBZVC8Gcrvh*i#^84zkQ5mhI9AO37kTvre#rxUQ%x{ZRqiyM;ZudkrmImU0=_-fPb0U*Hko)!!a(>GeRfNXe-iFzQ3cE zO5(9#!VPhf=R5DQkJF1GIvP)K#CaDyrtNZRqzR4PKklWkCbX-<%XWQ2Le3Q(y!1|L zj{_zZdpRFX-SmO8TYc{WDJ(~@x>2@rn+)l)bCTDF+<=pOI^OPp8Ru=B^=lZ%9n%RT zUxdO&jQ|J-W@x-CjluUPATK#qt|ad?1NPWo+mhV*roAL)YNV{?nbXU(J_R()Z70L; zV-D=jBfqLINfk*ojuT9%xSF{#gIng22GC$_x83&8eql6ZrJ+R@hQf^s7CZ;~IleP< zgkXD>7_7Cl5Ue+*{>+et%!FKjA*7PNu=%y`;u+qY&=%)4Qmm6ItvVlwO>4mrRqw94 zIQX;w59@PwJ$FW)rPo?SL2Rwg*4*LdJA!I*s|*v58~Uh9FU;lhF>Rutfznl4uWEs` zusb($-aciC4!@49Y2U6mE*8yrTg@a{#*jjl7P#qvijxwNtsc`8s`Q~4r^jH7Z390^ zTYKy(Q$+~y6VfG~u*ZVv<;KUnbOj0kJw7R+TY$tq3zid}(kdCwp|ZJYtJb&m^#*c= zr?o!}b6&}Rpe~bVKEtUsx3;&SLBMA*g{2*rZp6VT^C2vaX#Qpk_M^-8l#4YG?gJqk z;_n>FnhB8r7cfuYn^^jo8ewuk0C|=o#j<+})zC`Zq)@b$e-!tb+1#QMF;0~*Gr{T$ zZ|dt=|I^d<(5(QRuj&o^2^)MK?-}(~^-9IW7rJmDd@Z9ddBknEEG5`cCM>?Us-=U9 zJ#gG7{Ub8EYTS`aw1jJp+w0@If?nmNsj9E#UF^{l0TYRxXK77*1zV?BlxjjqWTwHz zYcA`Z9M#A#E+0B^v3m`bh?;26gdTLW;`hixcvs2$#mGF*)TzTD17G-;51+swL*L&+DO~#m;#@>Edd#>h_Kc7(P+Ue%?0CMb{%B! zVv{`qiwIH*WPQJJoLOPl4Tw%2!o+yI1MXJmd#9SBDf+SHcJSmBa!YmI@0Djo8yA^! zBoqrC*w;tmtIMnHc*ViSbeV@1YTK#Zqyv3EczDV0pLGuXl_t(<_;q?1CF3%xRr$n= z`-09eQ`zZB&GtYik7`N3<&tFgtwW(FRGx3)C7*W*sI)4h>=?*oA?($Ze^*ApD&q1U?`(03hgfq}=iBvJT2aeK z&HUgzSJwzk68(@Yl^*Pom{Qfy0eetKgH#;S&Y{6~%r7$lls!`?u$!%_mcKA0CRyS8 zu`lCzOGC=6Auq)n<55=KqMs-qY`oV8VdbawU)&e69^qa$WzcueV?g}OUFet~t4Tj$ zpD0q@4kb=fTXRccLP_T^zH(G5v*&ZhGo$>$t^C}5DVI1w4yRRt{&XHI!(JJEpc3U& zaV##1e2zh1$;G0Y=d`$cxXg2We4cO4;_%`nlR#yj8WqfP50w-y6&Do8(0o%So?cw; zv+fqN0s_OlC@ffq>ztKAd3nHRw;?RM^`GBH*y_ivQr(#fc2h=v|6EsaKUY1o*sz;j z?o|Utgmm3Jci06Fj-1?_lNtWQlSA00`OhLDJw{vrfbQNC;eYW&MDib={0Bxto>B$V z;TsXmulopH*n<}Q}`k8eCe7nJhzIM9ZsJ8GLcnu;mvRuS~o`> zTgTd@-)O8-mbg{8z_@GIN(4~0oc*YX=zGX%BUjTb3&LRfw%mO&jZ^Zs^tOG5G=JEp zSGT#`3Bc~f#LPr>pJA(k5L{2UaZ1hlVUyzCO|Nb0vCsU}Vdyi;bUN80!De@gj^D*4 zON)5vuL7{AI7hPLtLZXkZcUJ1>L51xoUFVfu*@K}8$-ZtF*bamup|xJrmaumulXxS zvx@EGAF!ybsmB*L5aVwk>aOy{{k(?CUc(Vpmm zN4Y}XiDMGRq!4r&T4tv|)LVbxS$^F7ubdI!>9d!3j-9x#aO~=Mbj{w(uKn)c(A=-Y zsR6p?P5LG|hH%g(Q*IV)Bt|^6Nc{S(Feqp5D3?TpBPc@L1c@26_ft^ni zUT7=e=gOA%7oPZUZz+VkIThpHnEz;oVKJ=YoQBc#Afn60>bIEo=l1&-m?O8C7xhNS zL`tmFE8j?Z*abpEA(f+S>Q=H&BX_p6n58ka9x+OXx?ah2xzdJx{&x6rRiV*z3aCe? zQ(-a#h)@F4OR{usB^;Tg5BVYd|FTd2Isz$ob_aQom&_U*N?0vwh9)$G`vklJ&ZL@$ zS}C_tRx$5=bS-*6XR*j0Y{Z)Jx~N29u$8T27B@sZHE$~Ed#{nr*(fL3UsNWYy1`Hj zJ;t&tV-X2zenATK3{%^POj3fv=2^5QM~wZI*H3wLvt(Ne0c=A4lW667y%~)l3`!!L zN#!`S8`d6KPESz^g9|gafe-5|PSu0$=s2b{b28Tyl;0*!eprGL;UVk8y+#&4BT$}3 zLurIX5*`=#AuYzWoYPX3~W0-vuv7Hx`9mD-awpC-M|x>}3CK!KgFXX6fxm$;O`6HT7->?1j^16d$)h~7i+LOj!Yn&YzXR^*Xe|4|ZAt6L} zgs!k^T{gPckYHn9VO$?4IyngijWe-STh0_1zf|MgGLwJ*TD&j`!jI`eBSR3mhEEOA7t&2T{sk;VN-YGlQU%dqT?r%!}tW(j;RrLc-u=)fr6 z+nvC2eUA^T@u1%>TT0Su9#Jalr&Y-0L7OH$E!CZ)Zvo<)QK@p6kmvtpV{q;`z9m2; z@rL;mTkBKS{e5|f^|zT1$~EoZ8`Hj9k@#>cvRa%*WT0Qs&%xE^MVi^e8p)^gYddup zJT}e^qi-D9jYxKnrxSQL^Asd&DbK`2r{Qmq5@Gd>3rGiyEbflSs*KzZfuOgUtyu+u@GOqN z*4jq(JX~cRJ6+g;pJKijkF~&sP2d^x?M}P{*#oqx?}GayQXYk7gW+osK6ng!3GeCv za=1s`5BS*57^Zu@;PZ^KFQjH@`c+r27H#OdxC!oL>V#r?lAOgfE- zk}&KHL(<#}DL8P;hoy6LY%pCU$yzxSnT5@T^%Uuq^Aql zKlN=?N2bu0fxYZ{Wyiz!*OxHMWfJN|qBc#rRJdMyMeagAfvG}e1xZRwmYwxzyu-Nq zs*XsLa|&m76X~SISS-7sveXMJZVyvT_4?vYRivp3Rpdo4)PQyOJYpuQa#5#gqRTmI z?;J`vjXNqG=z;K6Q_RghbH#o7lR*em<^6?8K#o+_mc91WbMlm5EZKC%Y`NAJT9~)j z*DK^T^mgP$Nf0p$YU41=Tp-EPo>zP{NqW>s2m9iKfCrM-ipOGwI{7D7=jdtd#FhPS z5X!}&H)})mCYj^i`0E zmy0XZv132EQ0h^mS2;&%NvdM&1!}z-rvKL08s`lr6m#-_;?)=0r?%mk{d#~us=R!$ zNTr)+0$#1~R;p%_j7KOCrtCeSpt|1{;~hnArG(1K2n!WdqHw&E0olpNdI(b?*p0dw zhhTaTuLri&S-R6Q(!@+Ht2H3?Z6QL~R@o(cn8Zk36nVpJm-w1(-BJ9UfS-++{toiX zp8hjy(T{^iNX2c1AmZED2m9&S&)in zj_Om>W0NVv=ifh#Dmxmu4iih81(M1a1{88b9|)ko{*_-IUo6U8Ua5|KrPL?4yu6nh ztA~==S~2xait^X|L*R3g;J5d)3TI1>KQCLD_Si;YLR?^)9;q|;PFVPk+gJjb?#xF< z4Pch(8YaW>G^Zy<3olhJoaF*H)cQ3UjD$~nte#7}tbQP*T*~?EYmz9wcJ`E3(Jx(a zm2~XNxNCL$$$`Ai7%f|P1Wozgw!IP9V)4tFcy)h_=^h5!MHL8BWV4UaI~rLhczhkL zXsIK6_vFDgmWLuay)6zPTJw^0 zeItK}gG7<;d6Rg1cY*_Rr26lScEgyr-}_JESa6aDm&5lqOvw4-4uz z_omb}_ycB-rGxC)I}+sl-*^^VD(P1FyRhQ_*qBEM-hvc4r!yn?``{$_FXHbA6hmnv zcpN%E*lt*_^fx-h#!spyqp>plKCKw17C#x%a(iWdDlNrMd?h2d2Vef=^eYQG(~e!!{A@ zO@gYsRiM3r5&l|Q;rdX22}#aDyy9SAay~0nIO;THg96ouh+)LGl)rl}-DZs^gvtGV zF9+5GYV800)9ap(zqg9WzYYKIFR*_t^Y60Ie-{A&Dk2T<6Vm^b`M)Nkf4%+R0r=0` c+ll`JhlV=9`v(93*!P_r3jjEw{M+ik0I$%29smFU literal 0 HcmV?d00001 From f8ad750288abeca69f0afe7afaf856bcf38077f9 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 11:53:43 +0200 Subject: [PATCH 102/224] Enbales cli-based debug support for running containers --- varats/varats/tools/driver_run.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/varats/varats/tools/driver_run.py b/varats/varats/tools/driver_run.py index 854b31c9f..74741e92a 100644 --- a/varats/varats/tools/driver_run.py +++ b/varats/varats/tools/driver_run.py @@ -90,6 +90,9 @@ def __validate_project_parameters( @click.option( "--container", is_flag=True, help="Run experiments in a container." ) +@click.option( + "--debug", is_flag=True, help="Run container in a interactive debug mode." +) @click.option( "-E", "--experiment", @@ -104,6 +107,7 @@ def main( slurm: bool, submit: bool, container: bool, + debug: bool, experiment: tp.List[tp.Type['VersionExperiment']], projects: tp.List[str], pretend: bool, @@ -145,6 +149,9 @@ def main( bb_extra_args.append("--import") else: bb_command_args.append("container") + if debug: + bb_extra_args.append("--debug") + bb_extra_args.append("--interactive") if not slurm: bb_command_args.append("run") From e2a68b88ae0e9d25d459e3f4be1901a02e36d09a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 11:54:18 +0200 Subject: [PATCH 103/224] Ensure that BB config is loaded before calling BB --- varats/varats/tools/driver_build_setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/varats/varats/tools/driver_build_setup.py b/varats/varats/tools/driver_build_setup.py index 6023d9104..78e2dbbe1 100644 --- a/varats/varats/tools/driver_build_setup.py +++ b/varats/varats/tools/driver_build_setup.py @@ -270,6 +270,7 @@ def _build_in_container( install_mount = 'tools/' click.echo("Preparing container image.") + bb_cfg() # Ensure that BB config is loaded image_name = create_dev_image(image_base, build_type) source_mount = str(StageBuilder.varats_root / source_mount) From e6ac56c7b06b5fbc08d4ac4e8cf401a9e5aed84d Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 11:55:30 +0200 Subject: [PATCH 104/224] Adds container specifiers to perf-cs collection --- .../perf_tests/feature_perf_cs_collection.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 3e3ffb73d..00762442c 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -9,6 +9,7 @@ from benchbuild.utils.settings import get_number_of_jobs from plumbum import local +from varats.containers.containers import get_base_image, ImageBase from varats.experiment.workload_util import RSBinary, WorkloadCategory from varats.paper.paper_config import project_filter_generator from varats.project.project_domain import ProjectDomains @@ -108,6 +109,8 @@ class FeaturePerfCSCollection(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -188,6 +191,8 @@ class SynthSAFieldSensitivity(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -246,6 +251,8 @@ class SynthSAFlowSensitivity(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -307,6 +314,8 @@ class SynthSAContextSensitivity(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -365,6 +374,8 @@ class SynthSAWholeProgram(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -425,6 +436,8 @@ class SynthDADynamicDispatch(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -483,6 +496,8 @@ class SynthDARecursion(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -541,6 +556,8 @@ class SynthOVInsideLoop(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -601,6 +618,8 @@ class SynthFeatureInteraction(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -663,6 +682,8 @@ class SynthFeatureHigherOrderInteraction(VProject): ] } + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 From 5368cb73013395427bb3ae2090ee1cdac4dca138 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 12:04:25 +0200 Subject: [PATCH 105/224] Introduces debian12 containers --- varats/varats/containers/containers.py | 31 ++++++++++++++++++-------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/varats/varats/containers/containers.py b/varats/varats/containers/containers.py index ddb8abe93..92437cbb8 100644 --- a/varats/varats/containers/containers.py +++ b/varats/varats/containers/containers.py @@ -37,10 +37,12 @@ class ImageBase(Enum): """Container image bases that can be used by projects.""" - DEBIAN_10 = Distro.DEBIAN + DEBIAN_10 = (Distro.DEBIAN, 10) + DEBIAN_12 = (Distro.DEBIAN, 12) - def __init__(self, distro: Distro): + def __init__(self, distro: Distro, version_number: int): self.__distro = distro + self.__version_number = version_number @property def distro(self) -> Distro: @@ -156,7 +158,7 @@ def _create_stage_00_base_layers(stage_builder: StageBuilder) -> None: def _create_stage_10_varats_layers(stage_builder: StageBuilder) -> None: - stage_builder.layers.run('pip3', 'install', '--upgrade', 'pip') + stage_builder.layers.run('pip', 'install', '--upgrade', 'pip') _add_varats_layers(stage_builder) if bb_cfg()['container']['from_source']: add_benchbuild_layers(stage_builder.layers) @@ -215,13 +217,22 @@ def wrapped(stage_builder: StageBuilder) -> None: .run('make', '-j', str(get_number_of_jobs(bb_cfg()))) .run('make', 'install') .workingdir('/') - # install llvm 13 + # install llvm 14 .run('wget', 'https://apt.llvm.org/llvm.sh') .run('chmod', '+x', './llvm.sh') .run('./llvm.sh', '14', 'all') .run('ln', '-s', '/usr/bin/clang-14', '/usr/bin/clang') .run('ln', '-s', '/usr/bin/clang++-14', '/usr/bin/clang++') - .run('ln', '-s', '/usr/bin/lld-14', '/usr/bin/lld')) + .run('ln', '-s', '/usr/bin/lld-14', '/usr/bin/lld')), + ImageBase.DEBIAN_12: + _create_layers_helper(lambda ctx: ctx.layers + .from_("docker.io/library/debian:12") + .run('apt', 'update') + .run('apt', 'install', '-y', 'wget', 'gnupg', 'lsb-release', + 'software-properties-common', 'musl-dev', 'git', 'gcc', + 'libgit2-dev', 'libffi-dev', 'libyaml-dev', 'graphviz-dev', + 'python3', 'python3-pip', 'python3-virtualenv', 'clang', + 'lld', 'time')) } _STAGE_LAYERS: tp.Dict[ImageStage, @@ -313,7 +324,9 @@ def _set_varats_source_mount(image_context: StageBuilder, mnt_src: str) -> None: def _setup_venv(image_context: StageBuilder) -> None: venv_path = "/venv" - image_context.layers.run("pip3", "install", "virtualenv") + if image_context.base == ImageBase.DEBIAN_10: + image_context.layers.run("pip3", "install", "virtualenv") + image_context.layers.run("virtualenv", venv_path) image_context.layers.env(VIRTUAL_ENV=venv_path) image_context.layers.env(PATH=f"{venv_path}/bin:$PATH") @@ -331,9 +344,9 @@ def from_source( tgt_dir = image_context.varats_source_mount_target image.run('mkdir', f'{tgt_dir}', runtime=crun) - image.run('pip3', 'install', 'setuptools', runtime=crun) + image.run('pip', 'install', 'setuptools', runtime=crun) - pip_args = ['pip3', 'install'] + pip_args = ['pip', 'install'] if editable_install: pip_args.append("-e") _set_varats_source_mount(image_context, str(src_dir)) @@ -348,7 +361,7 @@ def from_source( def from_pip(image: ContainerImage) -> None: LOG.debug("installing varats from pip release.") image.run( - 'pip3', + 'pip', 'install', '--ignore-installed', 'varats-core', From e40ca2fed8548fd48ee112d93a91b46fab53c74a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 12:05:53 +0200 Subject: [PATCH 106/224] Encode FeatureExperiment cluster requirements --- varats/varats/experiments/vara/feature_experiment.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/varats/varats/experiments/vara/feature_experiment.py b/varats/varats/experiments/vara/feature_experiment.py index 0d500c21f..4f48a8251 100644 --- a/varats/varats/experiments/vara/feature_experiment.py +++ b/varats/varats/experiments/vara/feature_experiment.py @@ -17,6 +17,7 @@ Compile, Clean, ) +from benchbuild.utils.requirements import Requirement, SlurmMem from plumbum import local from varats.experiment.experiment_util import ( @@ -73,6 +74,8 @@ class FeatureExperiment(VersionExperiment, shorthand=""): REPORT_SPEC = ReportSpecification() + REQUIREMENTS: tp.List[Requirement] = [SlurmMem("250G")] + @abstractmethod def actions_for_project(self, project: VProject) -> tp.MutableSequence[Step]: From 9cfd84c64c398c3b2f31fb4949236e6865911cdd Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 12:14:49 +0200 Subject: [PATCH 107/224] Add container desc to experiments --- .../vara/feature_perf_precision.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index c057bbd3a..85a0ef361 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -15,6 +15,7 @@ from plumbum import local, BG from plumbum.commands.modifiers import Future +from varats.containers.containers import get_base_image, ImageBase from varats.data.reports.performance_influence_trace_report import ( PerfInfluenceTraceReportAggregate, ) @@ -494,6 +495,9 @@ class EbpfTraceTEFProfileRunner(FeatureExperiment, shorthand="ETEFp"): REPORT_SPEC = ReportSpecification(MPRTEFAggregate) + CONTAINER = get_base_image(ImageBase.DEBIAN_12 + ).run('apt', 'install', '-y', 'bpftrace') + def actions_for_project( self, project: VProject ) -> tp.MutableSequence[actions.Step]: @@ -974,6 +978,9 @@ class TEFProfileOverheadRunner(FeatureExperiment, shorthand="TEFo"): REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) + CONTAINER = get_base_image(ImageBase.DEBIAN_12 + ).run('apt', 'install', '-y', 'perf') + def actions_for_project( self, project: VProject ) -> tp.MutableSequence[actions.Step]: @@ -996,6 +1003,9 @@ class PIMProfileOverheadRunner(FeatureExperiment, shorthand="PIMo"): REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) + CONTAINER = get_base_image(ImageBase.DEBIAN_12 + ).run('apt', 'install', '-y', 'perf') + def actions_for_project( self, project: VProject ) -> tp.MutableSequence[actions.Step]: @@ -1019,6 +1029,9 @@ class EbpfTraceTEFOverheadRunner(FeatureExperiment, shorthand="ETEFo"): REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) + CONTAINER = get_base_image(ImageBase.DEBIAN_12 + ).run('apt', 'install', '-y', 'bpftrace', 'perf') + def actions_for_project( self, project: VProject ) -> tp.MutableSequence[actions.Step]: @@ -1042,6 +1055,9 @@ class BccTraceTEFOverheadRunner(FeatureExperiment, shorthand="BCCo"): REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) + CONTAINER = get_base_image(ImageBase.DEBIAN_12 + ).run('apt', 'install', '-y', 'perf') + def actions_for_project( self, project: VProject ) -> tp.MutableSequence[actions.Step]: @@ -1124,6 +1140,9 @@ class BlackBoxOverheadBaseline(FeatureExperiment, shorthand="BBBaseO"): REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) + CONTAINER = get_base_image(ImageBase.DEBIAN_12 + ).run('apt', 'install', '-y', 'perf') + def actions_for_project( self, project: VProject ) -> tp.MutableSequence[actions.Step]: From 95fce58f23babcc4c9c785afd4f17a7ed39963cd Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 12:16:58 +0200 Subject: [PATCH 108/224] Prevent loading bug --- varats-core/varats/report/linux_perf_report.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/varats-core/varats/report/linux_perf_report.py b/varats-core/varats/report/linux_perf_report.py index a4f68f4fc..3af794b73 100644 --- a/varats-core/varats/report/linux_perf_report.py +++ b/varats-core/varats/report/linux_perf_report.py @@ -23,6 +23,8 @@ import typing as tp from pathlib import Path +import numpy as np + from varats.report.report import BaseReport, ReportAggregate @@ -69,6 +71,9 @@ def __parse_ctx_switches(line: str) -> int: @staticmethod def __parse_branch_misses(line: str) -> int: + # TODO: fix return type + if line.startswith(""): + return np.NaN return int(line.split(" ")[0].replace(",", "")) @property From 001f8c98dc9c7d2fa6bdf08ddd7fe4ffc84525ea Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 14:19:52 +0200 Subject: [PATCH 109/224] Migrate from perf to time --- tests/report/test_gnu_time_report.py | 8 +- varats-core/varats/report/gnu_time_report.py | 23 +++++ .../feature_perf_precision_database.py | 87 ++++++++++--------- .../vara/feature_perf_precision.py | 37 +++----- varats/varats/plots/feature_perf_precision.py | 27 ++++-- 5 files changed, 109 insertions(+), 73 deletions(-) diff --git a/tests/report/test_gnu_time_report.py b/tests/report/test_gnu_time_report.py index 80a74b550..7b3262acf 100644 --- a/tests/report/test_gnu_time_report.py +++ b/tests/report/test_gnu_time_report.py @@ -18,7 +18,7 @@ Average total size (kbytes): 0 Maximum resident set size (kbytes): 1804 Average resident set size (kbytes): 0 - Major (requiring I/O) page faults: 0 + Major (requiring I/O) page faults: 2 Minor (reclaiming a frame) page faults: 142 Voluntary context switches: 1 Involuntary context switches: 1 @@ -63,6 +63,12 @@ def test_max_resident_size(self): with self.assertRaises(WrongTimeReportFormat): TimeReport._parse_max_resident_size(" Something other timed:") + def test_major_page_faults(self): + """Test if we correctly parse the amount of major page faults from the + input line.""" + with self.assertRaises(WrongTimeReportFormat): + TimeReport._parse_major_page_faults(" Something other timed:") + def test_max_resident_size_byte_type(self): """Test if we correctly parse the max resident size from the input line.""" diff --git a/varats-core/varats/report/gnu_time_report.py b/varats-core/varats/report/gnu_time_report.py index 88200ba52..51a00b840 100644 --- a/varats-core/varats/report/gnu_time_report.py +++ b/varats-core/varats/report/gnu_time_report.py @@ -64,6 +64,11 @@ def __init__(self, path: Path) -> None: TimeReport._parse_wall_clock_time(line) continue + if line.startswith("Major (requiring I/O) page faults"): + self.__major_page_faults: timedelta = \ + TimeReport._parse_major_page_faults(line) + continue + if line.startswith("Voluntary context switches"): self.__voluntary_ctx_switches: int = \ TimeReport._parse_voluntary_ctx_switches(line) @@ -101,6 +106,11 @@ def max_res_size(self) -> int: """Maximum resident size.""" return self.__max_resident_size + @property + def major_page_faults(self) -> int: + """Major page faults (require I/O).""" + return self.__major_page_faults + @property def voluntary_ctx_switches(self) -> int: """Number of voluntary context switches.""" @@ -217,6 +227,15 @@ def _parse_max_resident_size(line: str) -> int: "Could not parse max resident set size: ", line ) + @staticmethod + def _parse_major_page_faults(line: str) -> int: + if line.startswith("Major (requiring I/O) page faults"): + return int(line.split(":")[1]) + + raise WrongTimeReportFormat( + "Could not parse voluntary context switches: ", line + ) + @staticmethod def _parse_voluntary_ctx_switches(line: str) -> int: if line.startswith("Voluntary context switches"): @@ -268,6 +287,10 @@ def measurements_ctx_switches(self) -> tp.List[int]: def max_resident_sizes(self) -> tp.List[int]: return [report.max_res_size for report in self.reports()] + @property + def major_page_faults(self) -> tp.List[int]: + return [report.major_page_faults for report in self.reports()] + @property def summary(self) -> str: import numpy as np # pylint: disable=import-outside-toplevel diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 335432666..e69b21a45 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -12,8 +12,7 @@ from varats.experiments.vara.feature_experiment import FeatureExperiment from varats.paper.case_study import CaseStudy from varats.paper_mgmt.case_study import get_case_study_file_name_filter -from varats.report.gnu_time_report import TimeReportAggregate -from varats.report.linux_perf_report import LinuxPerfReportAggregate +from varats.report.gnu_time_report import TimeReportAggregate, TimeReport from varats.report.report import BaseReport, ReportFilepath from varats.report.tef_report import ( TEFReport, @@ -419,7 +418,7 @@ class Baseline(Profiler): def __init__(self) -> None: super().__init__( "Base", fpp.BlackBoxBaselineRunner, fpp.BlackBoxOverheadBaseline, - fpp.LinuxPerfReportAggregate + fpp.TimeReportAggregate ) def is_regression(self, report_path: ReportFilepath) -> bool: @@ -468,34 +467,35 @@ class OverheadData: def __init__( self, profiler, mean_time: tp.Dict[int, float], - mean_bmiss: tp.Dict[int, float], ctx_switches: tp.Dict[int, float] + mean_memory: tp.Dict[int, float], major_page_faults: tp.Dict[int, float] ) -> None: self.__profiler = profiler self.__mean_time: tp.Dict[int, float] = mean_time - self.__mean_bmiss: tp.Dict[int, float] = mean_bmiss - self.__mean_ctx_switches: tp.Dict[int, float] = ctx_switches + self.__mean_memory: tp.Dict[int, float] = mean_memory + self.__mean_major_page_faults: tp.Dict[int, float] = major_page_faults def mean_time(self) -> float: return float(np.mean(list(self.__mean_time.values()))) - def mean_bmiss(self) -> float: - return float(np.mean(list(self.__mean_bmiss.values()))) + def mean_memory(self) -> float: + return float(np.mean(list(self.__mean_memory.values()))) - def mean_ctx(self) -> float: - return float(np.mean(list(self.__mean_ctx_switches.values()))) + def mean_major_page_faults(self) -> float: + return float(np.mean(list(self.__mean_major_page_faults.values()))) def config_wise_time_diff(self, other: 'OverheadData') -> tp.Dict[int, float]: return self.__config_wise(self.__mean_time, other.__mean_time) - def config_wise_bmiss_diff(self, - other: 'OverheadData') -> tp.Dict[int, float]: - return self.__config_wise(self.__mean_bmiss, other.__mean_bmiss) + def config_wise_memory_diff(self, + other: 'OverheadData') -> tp.Dict[int, float]: + return self.__config_wise(self.__mean_memory, other.__mean_memory) - def config_wise_ctx_diff(self, - other: 'OverheadData') -> tp.Dict[int, float]: + def config_wise_major_page_faults_diff( + self, other: 'OverheadData' + ) -> tp.Dict[int, float]: return self.__config_wise( - self.__mean_ctx_switches, other.__mean_ctx_switches + self.__mean_major_page_faults, other.__mean_major_page_faults ) @staticmethod @@ -517,14 +517,14 @@ def compute_overhead_data( ) -> tp.Optional['OverheadData']: mean_time: tp.Dict[int, float] = {} - mean_bmiss: tp.Dict[int, float] = {} - mean_cxt_switches: tp.Dict[int, float] = {} + mean_memory: tp.Dict[int, float] = {} + mean_major_page_faults: tp.Dict[int, float] = {} for config_id in case_study.get_config_ids_for_revision(rev): report_files = get_processed_revisions_files( case_study.project_name, profiler.overhead_experiment, - LinuxPerfReportAggregate, + TimeReportAggregate, get_case_study_file_name_filter(case_study), config_id=config_id ) @@ -538,11 +538,15 @@ def compute_overhead_data( ) return None - time_report = LinuxPerfReportAggregate(report_files[0].full_path()) - mean_time[config_id] = float(np.mean(time_report.elapsed_time)) - mean_bmiss[config_id] = float(np.mean(time_report.branch_misses)) - mean_cxt_switches[config_id] = float( - np.mean(time_report.ctx_switches) + time_report = TimeReportAggregate(report_files[0].full_path()) + mean_time[config_id] = float( + np.mean(time_report.measurements_wall_clock_time) + ) + mean_memory[config_id] = float( + np.mean(time_report.max_resident_sizes) + ) + mean_major_page_faults[config_id] = float( + np.mean(time_report.major_page_faults) ) if not mean_time: print( @@ -552,7 +556,9 @@ def compute_overhead_data( return None # print(f"{mean_time=}") - return OverheadData(profiler, mean_time, mean_bmiss, mean_cxt_switches) + return OverheadData( + profiler, mean_time, mean_memory, mean_major_page_faults + ) def load_precision_data(case_studies, profilers) -> pd.DataFrame: @@ -626,11 +632,11 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: 'CaseStudy': project_name, 'Profiler': "Base", 'time': overhead_ground_truth.mean_time(), - 'bmiss': overhead_ground_truth.mean_bmiss(), - 'ctx': overhead_ground_truth.mean_ctx(), + 'memory': overhead_ground_truth.mean_memory(), + 'major_page_faults': overhead_ground_truth.mean_major_page_faults(), 'overhead_time': 0, - 'overhead_bmiss': 0, - 'overhead_ctx': 0 + 'overhead_memory': 0, + 'overhead_major_page_faults': 0 } table_rows.append(new_row) @@ -646,30 +652,33 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: time_diff = profiler_overhead.config_wise_time_diff( overhead_ground_truth ) - bmiss_diff = profiler_overhead.config_wise_bmiss_diff( + memory_diff = profiler_overhead.config_wise_memory_diff( overhead_ground_truth ) - ctx_diff = profiler_overhead.config_wise_ctx_diff( + major_page_faults_diff = profiler_overhead.config_wise_major_page_faults_diff( overhead_ground_truth ) new_row['time'] = profiler_overhead.mean_time() new_row['overhead_time'] = np.mean(list(time_diff.values())) - new_row['bmiss'] = profiler_overhead.mean_bmiss() - new_row['overhead_bmiss'] = np.mean(list(bmiss_diff.values())) + new_row['memory'] = profiler_overhead.mean_memory() + new_row['overhead_memory'] = np.mean(list(memory_diff.values())) - new_row['ctx'] = profiler_overhead.mean_ctx() - new_row['overhead_ctx'] = np.mean(list(ctx_diff.values())) + new_row['major_page_faults' + ] = profiler_overhead.mean_major_page_faults() + new_row['overhead_major_page_faults'] = np.mean( + list(major_page_faults_diff.values()) + ) else: new_row['time'] = np.nan new_row['overhead_time'] = np.nan - new_row['bmiss'] = np.nan - new_row['overhead_bmiss'] = np.nan + new_row['memory'] = np.nan + new_row['overhead_memory'] = np.nan - new_row['ctx'] = np.nan - new_row['overhead_ctx'] = np.nan + new_row['major_page_faults'] = np.nan + new_row['overhead_major_page_faults'] = np.nan table_rows.append(new_row) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 85a0ef361..2564c2074 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -41,7 +41,6 @@ from varats.project.varats_project import VProject from varats.provider.patch.patch_provider import PatchProvider from varats.report.gnu_time_report import TimeReportAggregate -from varats.report.linux_perf_report import LinuxPerfReportAggregate from varats.report.multi_patch_report import MultiPatchReport from varats.report.report import ReportSpecification from varats.report.tef_report import TEFReportAggregate @@ -740,8 +739,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) print(f"Running example {prj_command.command.label}") - timed_pb_cmd = perf["stat", "-o", time_report_file, - "--", pb_cmd] + timed_pb_cmd = time["-v", "-o", time_report_file, "--", + pb_cmd] extra_options = get_extra_config_options(self.project) with cleanup(prj_command): @@ -816,7 +815,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: adapted_binary_location, override=True ) - timed_pb_cmd = perf["stat", "-o", time_report_file, + timed_pb_cmd = time["-v", "-o", time_report_file, "--", pb_cmd] extra_options = get_extra_config_options( @@ -891,8 +890,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) print(f"Running example {prj_command.command.label}") - timed_pb_cmd = perf["stat", "-o", time_report_file, - "--", pb_cmd] + timed_pb_cmd = time["-v", "-o", time_report_file, "--", + pb_cmd] extra_options = get_extra_config_options(self.project) @@ -976,10 +975,7 @@ class TEFProfileOverheadRunner(FeatureExperiment, shorthand="TEFo"): NAME = "RunTEFProfilerO" - REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) - - CONTAINER = get_base_image(ImageBase.DEBIAN_12 - ).run('apt', 'install', '-y', 'perf') + REPORT_SPEC = ReportSpecification(TimeReportAggregate) def actions_for_project( self, project: VProject @@ -1001,10 +997,7 @@ class PIMProfileOverheadRunner(FeatureExperiment, shorthand="PIMo"): NAME = "RunPIMProfilerO" - REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) - - CONTAINER = get_base_image(ImageBase.DEBIAN_12 - ).run('apt', 'install', '-y', 'perf') + REPORT_SPEC = ReportSpecification(TimeReportAggregate) def actions_for_project( self, project: VProject @@ -1027,10 +1020,10 @@ class EbpfTraceTEFOverheadRunner(FeatureExperiment, shorthand="ETEFo"): NAME = "RunEBPFTraceTEFProfilerO" - REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) + REPORT_SPEC = ReportSpecification(TimeReportAggregate) CONTAINER = get_base_image(ImageBase.DEBIAN_12 - ).run('apt', 'install', '-y', 'bpftrace', 'perf') + ).run('apt', 'install', '-y', 'bpftrace') def actions_for_project( self, project: VProject @@ -1053,10 +1046,7 @@ class BccTraceTEFOverheadRunner(FeatureExperiment, shorthand="BCCo"): NAME = "RunBCCTEFProfilerO" - REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) - - CONTAINER = get_base_image(ImageBase.DEBIAN_12 - ).run('apt', 'install', '-y', 'perf') + REPORT_SPEC = ReportSpecification(TimeReportAggregate) def actions_for_project( self, project: VProject @@ -1119,7 +1109,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: print(f"Running example {prj_command.command.label}") # timed_pb_cmd = time["-v", "-o", time_report_file, pb_cmd] - timed_pb_cmd = perf["stat", "-o", time_report_file, "--", + timed_pb_cmd = time["-v", "-o", time_report_file, "--", pb_cmd] extra_options = get_extra_config_options(self.project) @@ -1138,10 +1128,7 @@ class BlackBoxOverheadBaseline(FeatureExperiment, shorthand="BBBaseO"): NAME = "GenBBBaselineO" - REPORT_SPEC = ReportSpecification(LinuxPerfReportAggregate) - - CONTAINER = get_base_image(ImageBase.DEBIAN_12 - ).run('apt', 'install', '-y', 'perf') + REPORT_SPEC = ReportSpecification(TimeReportAggregate) def actions_for_project( self, project: VProject diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index eea4cb207..f7ef5325c 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -174,8 +174,8 @@ class PerfOverheadPlot(Plot, plot_name='fperf_overhead'): def plot(self, view_mode: bool) -> None: # -- Configure plot -- plot_metric = [("Time", "overhead_time_rel"), - ("Branch Misses", "overhead_bmiss_rel"), - ("Ctx", "overhead_ctx_rel")] + ("Memory", "overhead_memory_rel"), + ("Major Page Faults", "overhead_major_page_faults_rel")] target_row = "f1_score" # target_row = "precision" @@ -199,18 +199,29 @@ def plot(self, view_mode: bool) -> None: print(f"precision_df=\n{precision_df}") overhead_df = load_overhead_data(case_studies, profilers) + print(f"{overhead_df=}") overhead_df['overhead_time_rel'] = overhead_df['time'] / ( overhead_df['time'] - overhead_df['overhead_time'] ) * 100 - overhead_df['overhead_ctx_rel'] = overhead_df['ctx'] / ( - overhead_df['ctx'] - overhead_df['overhead_ctx'] + overhead_df['overhead_memory_rel'] = overhead_df['memory'] / ( + overhead_df['memory'] - overhead_df['overhead_memory'] ) * 100 - overhead_df["overhead_ctx_rel"].fillna(100, inplace=True) + overhead_df['overhead_memory_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + + overhead_df['overhead_major_page_faults_rel' + ] = overhead_df['major_page_faults'] / ( + overhead_df['major_page_faults'] - + overhead_df['overhead_major_page_faults'] + ) * 100 + overhead_df['overhead_major_page_faults_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + # TODO: fix + overhead_df["overhead_major_page_faults_rel"].fillna(100, inplace=True) - overhead_df['overhead_bmiss_rel'] = overhead_df['bmiss'] / ( - overhead_df['bmiss'] - overhead_df['overhead_bmiss'] - ) * 100 print(f"other_df=\n{overhead_df}") merged_df = pd.merge( From c66d15eeb01d0793fa93626861894673b6f724a8 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 14:21:25 +0200 Subject: [PATCH 110/224] Report project name in error --- .../varats/data/databases/feature_perf_precision_database.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index e69b21a45..48b9f5f0c 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -445,8 +445,8 @@ def compute_profiler_predictions( raise AssertionError("Should only be one") if not report_files: print( - f"Could not find profiling data. {config_id=}, " - f"profiler={profiler.name}" + f"Could not find profiling data for {project_name=}" + f". {config_id=}, profiler={profiler.name}" ) return None From fdbaf89153efec9ab9bd448fdb93f9eb1c1a127e Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 19:17:32 +0200 Subject: [PATCH 111/224] Extend time report parsing --- tests/report/test_gnu_time_report.py | 28 +++++++++- varats-core/varats/report/gnu_time_report.py | 57 +++++++++++++++++--- 2 files changed, 77 insertions(+), 8 deletions(-) diff --git a/tests/report/test_gnu_time_report.py b/tests/report/test_gnu_time_report.py index 7b3262acf..d836c1929 100644 --- a/tests/report/test_gnu_time_report.py +++ b/tests/report/test_gnu_time_report.py @@ -23,8 +23,8 @@ Voluntary context switches: 1 Involuntary context switches: 1 Swaps: 0 - File system inputs: 0 - File system outputs: 0 + File system inputs: 1 + File system outputs: 2 Socket messages sent: 0 Socket messages received: 0 Signals delivered: 0 @@ -103,6 +103,30 @@ def test_system_time(self): """Test if we can extract the system time from the parsed file.""" self.assertEqual(self.report.system_time, timedelta(seconds=3)) + def test_wall_clock_time(self): + """Test if we can extract the wall clock time from the parsed file.""" + self.assertEqual(self.report.wall_clock_time, timedelta(seconds=42)) + + def test_max_resident_size(self) -> None: + """Test if we can extract the max resident size from the parsed file.""" + self.assertEqual(self.report.max_res_size, 1804) + + def test_major_page_faults(self) -> None: + """Test if we can extract the number of major page faults from the + parsed file.""" + self.assertEqual(self.report.major_page_faults, 2) + + def test_minor_page_faults(self) -> None: + """Test if we can extract the number of minor page faults from the + parsed file.""" + self.assertEqual(self.report.minor_page_faults, 142) + + def test_filesystem_io(self) -> None: + """Test if we can extract the number of filesystem inputs/outputs from + the parsed file.""" + self.assertEqual(self.report.filesystem_io[0], 1) + self.assertEqual(self.report.filesystem_io[1], 2) + def test_repr_str(self): """Test string representation of TimeReports.""" expected_result = """Command: echo diff --git a/varats-core/varats/report/gnu_time_report.py b/varats-core/varats/report/gnu_time_report.py index 51a00b840..8c2dcf7b2 100644 --- a/varats-core/varats/report/gnu_time_report.py +++ b/varats-core/varats/report/gnu_time_report.py @@ -37,7 +37,7 @@ class TimeReport(BaseReport, shorthand="TR", file_type="txt"): def __init__(self, path: Path) -> None: super().__init__(path) - + self.__filesystem_io = (-1, -1) with open(self.path, 'r') as stream: for line in stream: line = line.strip() @@ -65,10 +65,15 @@ def __init__(self, path: Path) -> None: continue if line.startswith("Major (requiring I/O) page faults"): - self.__major_page_faults: timedelta = \ + self.__major_page_faults: int = \ TimeReport._parse_major_page_faults(line) continue + if line.startswith("Minor (reclaiming a frame) page faults"): + self.__minor_page_faults: int = \ + TimeReport._parse_minor_page_faults(line) + continue + if line.startswith("Voluntary context switches"): self.__voluntary_ctx_switches: int = \ TimeReport._parse_voluntary_ctx_switches(line) @@ -79,7 +84,19 @@ def __init__(self, path: Path) -> None: TimeReport._parse_involuntary_ctx_switches(line) continue - # print("Not matched: ", line) + if line.startswith("File system inputs"): + self.__filesystem_io = ( + TimeReport._parse_filesystem_io(line), + self.__filesystem_io[1] + ) + continue + + if line.startswith("File system inputs"): + self.__filesystem_io = ( + self.__filesystem_io[0], + TimeReport._parse_filesystem_io(line) + ) + continue @property def command_name(self) -> str: @@ -111,6 +128,20 @@ def major_page_faults(self) -> int: """Major page faults (require I/O).""" return self.__major_page_faults + @property + def minor_page_faults(self) -> int: + """Minor page faults (reclaim a frame).""" + return self.__minor_page_faults + + @property + def filesystem_io(self) -> tp.Tuple[int, int]: + """ + Filesystem inputs/outputs. + + Returns: a tuple of (#inputs, #outputs) + """ + return self.__filesystem_io + @property def voluntary_ctx_switches(self) -> int: """Number of voluntary context switches.""" @@ -232,9 +263,14 @@ def _parse_major_page_faults(line: str) -> int: if line.startswith("Major (requiring I/O) page faults"): return int(line.split(":")[1]) - raise WrongTimeReportFormat( - "Could not parse voluntary context switches: ", line - ) + raise WrongTimeReportFormat("Could not parse major page faults: ", line) + + @staticmethod + def _parse_minor_page_faults(line: str) -> int: + if line.startswith("Minor (reclaiming a frame) page faults"): + return int(line.split(":")[1]) + + raise WrongTimeReportFormat("Could not parse minor page faults: ", line) @staticmethod def _parse_voluntary_ctx_switches(line: str) -> int: @@ -254,6 +290,15 @@ def _parse_involuntary_ctx_switches(line: str) -> int: "Could not parse involuntary context switches: ", line ) + @staticmethod + def _parse_filesystem_io(line: str) -> int: + if line.startswith("File system "): + return int(line.split(":")[1]) + + raise WrongTimeReportFormat( + "Could not parse filesystem inputs/outputs: ", line + ) + class TimeReportAggregate( ReportAggregate[TimeReport], From 80c70ca2db48911ee3cfe82348ba15aa5f38cbaf Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 4 Sep 2023 20:00:09 +0200 Subject: [PATCH 112/224] Extend overhead overview plot by more metrics --- varats-core/varats/report/gnu_time_report.py | 10 +- .../feature_perf_precision_database.py | 93 ++++++++++++++++++- varats/varats/plots/feature_perf_precision.py | 55 ++++++++++- 3 files changed, 149 insertions(+), 9 deletions(-) diff --git a/varats-core/varats/report/gnu_time_report.py b/varats-core/varats/report/gnu_time_report.py index 8c2dcf7b2..9fab82ead 100644 --- a/varats-core/varats/report/gnu_time_report.py +++ b/varats-core/varats/report/gnu_time_report.py @@ -91,7 +91,7 @@ def __init__(self, path: Path) -> None: ) continue - if line.startswith("File system inputs"): + if line.startswith("File system outputs"): self.__filesystem_io = ( self.__filesystem_io[0], TimeReport._parse_filesystem_io(line) @@ -336,6 +336,14 @@ def max_resident_sizes(self) -> tp.List[int]: def major_page_faults(self) -> tp.List[int]: return [report.major_page_faults for report in self.reports()] + @property + def minor_page_faults(self) -> tp.List[int]: + return [report.minor_page_faults for report in self.reports()] + + @property + def filesystem_io(self) -> tp.List[tp.Tuple[int, int]]: + return [report.filesystem_io for report in self.reports()] + @property def summary(self) -> str: import numpy as np # pylint: disable=import-outside-toplevel diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 48b9f5f0c..ab679b2a3 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -467,12 +467,18 @@ class OverheadData: def __init__( self, profiler, mean_time: tp.Dict[int, float], - mean_memory: tp.Dict[int, float], major_page_faults: tp.Dict[int, float] + mean_memory: tp.Dict[int, float], major_page_faults: tp.Dict[int, + float], + minor_page_faults: tp.Dict[int, float], fs_inputs: tp.Dict[int, float], + fs_outputs: tp.Dict[int, float] ) -> None: self.__profiler = profiler self.__mean_time: tp.Dict[int, float] = mean_time self.__mean_memory: tp.Dict[int, float] = mean_memory self.__mean_major_page_faults: tp.Dict[int, float] = major_page_faults + self.__mean_minor_page_faults: tp.Dict[int, float] = minor_page_faults + self.__mean_fs_inputs: tp.Dict[int, float] = fs_inputs + self.__mean_fs_outputs: tp.Dict[int, float] = fs_outputs def mean_time(self) -> float: return float(np.mean(list(self.__mean_time.values()))) @@ -483,6 +489,15 @@ def mean_memory(self) -> float: def mean_major_page_faults(self) -> float: return float(np.mean(list(self.__mean_major_page_faults.values()))) + def mean_minor_page_faults(self) -> float: + return float(np.mean(list(self.__mean_minor_page_faults.values()))) + + def mean_fs_inputs(self) -> float: + return float(np.mean(list(self.__mean_fs_inputs.values()))) + + def mean_fs_outputs(self) -> float: + return float(np.mean(list(self.__mean_fs_outputs.values()))) + def config_wise_time_diff(self, other: 'OverheadData') -> tp.Dict[int, float]: return self.__config_wise(self.__mean_time, other.__mean_time) @@ -498,6 +513,25 @@ def config_wise_major_page_faults_diff( self.__mean_major_page_faults, other.__mean_major_page_faults ) + def config_wise_minor_page_faults_diff( + self, other: 'OverheadData' + ) -> tp.Dict[int, float]: + return self.__config_wise( + self.__mean_minor_page_faults, other.__mean_minor_page_faults + ) + + def config_wise_fs_inputs_diff( + self, other: 'OverheadData' + ) -> tp.Dict[int, float]: + return self.__config_wise(self.__mean_fs_inputs, other.__mean_fs_inputs) + + def config_wise_fs_outputs_diff( + self, other: 'OverheadData' + ) -> tp.Dict[int, float]: + return self.__config_wise( + self.__mean_fs_outputs, other.__mean_fs_outputs + ) + @staticmethod def __config_wise( self_map: tp.Dict[int, float], other_map: tp.Dict[int, float] @@ -519,6 +553,9 @@ def compute_overhead_data( mean_time: tp.Dict[int, float] = {} mean_memory: tp.Dict[int, float] = {} mean_major_page_faults: tp.Dict[int, float] = {} + mean_minor_page_faults: tp.Dict[int, float] = {} + mean_fs_inputs: tp.Dict[int, float] = {} + mean_fs_outputs: tp.Dict[int, float] = {} for config_id in case_study.get_config_ids_for_revision(rev): report_files = get_processed_revisions_files( @@ -548,6 +585,15 @@ def compute_overhead_data( mean_major_page_faults[config_id] = float( np.mean(time_report.major_page_faults) ) + mean_minor_page_faults[config_id] = float( + np.mean(time_report.minor_page_faults) + ) + mean_fs_inputs[config_id] = float( + np.mean([io[0] for io in time_report.filesystem_io]) + ) + mean_fs_outputs[config_id] = float( + np.mean([io[1] for io in time_report.filesystem_io]) + ) if not mean_time: print( f"Case study for project {case_study.project_name} had " @@ -557,7 +603,8 @@ def compute_overhead_data( # print(f"{mean_time=}") return OverheadData( - profiler, mean_time, mean_memory, mean_major_page_faults + profiler, mean_time, mean_memory, mean_major_page_faults, + mean_minor_page_faults, mean_fs_inputs, mean_fs_outputs ) @@ -634,9 +681,15 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: 'time': overhead_ground_truth.mean_time(), 'memory': overhead_ground_truth.mean_memory(), 'major_page_faults': overhead_ground_truth.mean_major_page_faults(), + 'minor_page_faults': overhead_ground_truth.mean_minor_page_faults(), + 'fs_inputs': overhead_ground_truth.mean_fs_inputs(), + 'fs_outputs': overhead_ground_truth.mean_fs_outputs(), 'overhead_time': 0, 'overhead_memory': 0, - 'overhead_major_page_faults': 0 + 'overhead_major_page_faults': 0, + 'overhead_minor_page_faults': 0, + 'overhead_fs_inputs': 0, + 'overhead_fs_outputs': 0 } table_rows.append(new_row) @@ -658,6 +711,15 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: major_page_faults_diff = profiler_overhead.config_wise_major_page_faults_diff( overhead_ground_truth ) + minor_page_faults_diff = profiler_overhead.config_wise_minor_page_faults_diff( + overhead_ground_truth + ) + fs_inputs_diff = profiler_overhead.config_wise_fs_inputs_diff( + overhead_ground_truth + ) + fs_outputs_diff = profiler_overhead.config_wise_fs_outputs_diff( + overhead_ground_truth + ) new_row['time'] = profiler_overhead.mean_time() new_row['overhead_time'] = np.mean(list(time_diff.values())) @@ -670,6 +732,22 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: new_row['overhead_major_page_faults'] = np.mean( list(major_page_faults_diff.values()) ) + + new_row['minor_page_faults' + ] = profiler_overhead.mean_minor_page_faults() + new_row['overhead_minor_page_faults'] = np.mean( + list(minor_page_faults_diff.values()) + ) + + new_row['fs_inputs'] = profiler_overhead.mean_fs_inputs() + new_row['overhead_fs_inputs'] = np.mean( + list(fs_inputs_diff.values()) + ) + + new_row['fs_outputs'] = profiler_overhead.mean_fs_outputs() + new_row['overhead_fs_outputs'] = np.mean( + list(fs_outputs_diff.values()) + ) else: new_row['time'] = np.nan new_row['overhead_time'] = np.nan @@ -680,6 +758,15 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: new_row['major_page_faults'] = np.nan new_row['overhead_major_page_faults'] = np.nan + new_row['minor_page_faults'] = np.nan + new_row['overhead_minor_page_faults'] = np.nan + + new_row['fs_inputs'] = np.nan + new_row['overhead_fs_inputs'] = np.nan + + new_row['fs_outputs'] = np.nan + new_row['overhead_fs_outputs'] = np.nan + table_rows.append(new_row) return pd.DataFrame(table_rows) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index f7ef5325c..7a7e5adc6 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -1,6 +1,7 @@ """Module for the FeaturePerfPrecision plots.""" import random import typing as tp +from itertools import chain import matplotlib.pyplot as plt import numpy as np @@ -175,7 +176,10 @@ def plot(self, view_mode: bool) -> None: # -- Configure plot -- plot_metric = [("Time", "overhead_time_rel"), ("Memory", "overhead_memory_rel"), - ("Major Page Faults", "overhead_major_page_faults_rel")] + ("Major Page Faults", "overhead_major_page_faults_rel"), + ("Minor Page Faults", "overhead_minor_page_faults_rel"), + ("Filesystem Inputs", "overhead_fs_inputs_rel"), + ("Filesystem Outputs", "overhead_fs_outputs_rel")] target_row = "f1_score" # target_row = "precision" @@ -211,6 +215,7 @@ def plot(self, view_mode: bool) -> None: np.nan, inplace=True) + # Page faults overhead_df['overhead_major_page_faults_rel' ] = overhead_df['major_page_faults'] / ( overhead_df['major_page_faults'] - @@ -222,6 +227,32 @@ def plot(self, view_mode: bool) -> None: # TODO: fix overhead_df["overhead_major_page_faults_rel"].fillna(100, inplace=True) + overhead_df['overhead_minor_page_faults_rel' + ] = overhead_df['minor_page_faults'] / ( + overhead_df['minor_page_faults'] - + overhead_df['overhead_minor_page_faults'] + ) * 100 + overhead_df['overhead_minor_page_faults_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + # TODO: fix + overhead_df["overhead_minor_page_faults_rel"].fillna(100, inplace=True) + + # Filesystem + overhead_df['overhead_fs_inputs_rel'] = overhead_df['fs_inputs'] / ( + overhead_df['fs_inputs'] - overhead_df['overhead_fs_inputs'] + ) * 100 + overhead_df['overhead_fs_inputs_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + + overhead_df['overhead_fs_outputs_rel'] = overhead_df['fs_outputs'] / ( + overhead_df['fs_outputs'] - overhead_df['overhead_fs_outputs'] + ) * 100 + overhead_df['overhead_fs_outputs_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + print(f"other_df=\n{overhead_df}") merged_df = pd.merge( @@ -231,8 +262,9 @@ def plot(self, view_mode: bool) -> None: # print(f"{self.plot_config.width()}") + rows = 3 _, axes = plt.subplots( - ncols=len(plot_metric), nrows=1, figsize=(30, 10) + ncols=int(len(plot_metric) / rows), nrows=rows, figsize=(30, 10) ) if len(plot_metric) == 1: @@ -241,7 +273,7 @@ def plot(self, view_mode: bool) -> None: axes ) else: - for idx, ax in enumerate(axes): + for idx, ax in enumerate(list(chain.from_iterable(axes))): self.do_single_plot( plot_metric[idx][1], target_row, merged_df, plot_metric[idx][0], ax @@ -274,12 +306,25 @@ def do_single_plot( text_obj.set_text("Subject Systems") text_obj.set_fontweight("bold") - ax.set_xlabel(f"{plot_extra_name} Overhead in %") + ax.set_xlabel(f"Relative {plot_extra_name}") if target_row == "f1_score": ax.set_ylabel("F1-Score") ax.set_ylim(0.0, 1.02) - ax.set_xlim(np.max(merged_df[x_values]) + 20, 100) + # Sets the limit at least to 150 or otherwise to the largest non + # inf/nan value + x_limit = max( + np.max( + np.nan_to_num( + merged_df[x_values], + copy=True, + nan=0.0, + posinf=0.0, + neginf=0.0 + ) + ) + 20, 120 + ) + ax.set_xlim(x_limit, 100) ax.xaxis.label.set_size(20) ax.yaxis.label.set_size(20) ax.tick_params(labelsize=15) From a4f6f9f236f41570a1cc21d362d01d08c16c26ec Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 5 Sep 2023 13:17:54 +0200 Subject: [PATCH 113/224] Fixes container decl for experiments --- varats/varats/experiments/vara/feature_perf_precision.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 2564c2074..28acc21e4 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -9,6 +9,7 @@ import benchbuild.extensions as bb_ext from benchbuild.command import cleanup +from benchbuild.environments.domain.declarative import ContainerImage from benchbuild.utils import actions from benchbuild.utils.actions import StepResult, Clean from benchbuild.utils.cmd import time, rm, cp, numactl, sudo, bpftrace, perf @@ -494,8 +495,7 @@ class EbpfTraceTEFProfileRunner(FeatureExperiment, shorthand="ETEFp"): REPORT_SPEC = ReportSpecification(MPRTEFAggregate) - CONTAINER = get_base_image(ImageBase.DEBIAN_12 - ).run('apt', 'install', '-y', 'bpftrace') + CONTAINER = ContainerImage().run('apt', 'install', '-y', 'bpftrace') def actions_for_project( self, project: VProject @@ -1022,8 +1022,7 @@ class EbpfTraceTEFOverheadRunner(FeatureExperiment, shorthand="ETEFo"): REPORT_SPEC = ReportSpecification(TimeReportAggregate) - CONTAINER = get_base_image(ImageBase.DEBIAN_12 - ).run('apt', 'install', '-y', 'bpftrace') + CONTAINER = ContainerImage().run('apt', 'install', '-y', 'bpftrace') def actions_for_project( self, project: VProject From 2ac1036c389486563727c2c97661771adfdbdbcd Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 7 Sep 2023 10:00:33 +0200 Subject: [PATCH 114/224] Implements basic subject system overview table --- .../varats/tables/feature_perf_precision.py | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 357d9fad3..295db9646 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -1,8 +1,11 @@ """Module for the FeaturePerfPrecision tables.""" +import re import typing as tp +from pathlib import Path import numpy as np import pandas as pd +from plumbum import local, TF, RETCODE from pylatex import Document, Package from varats.data.databases.feature_perf_precision_database import ( @@ -13,16 +16,21 @@ Profiler, VXray, PIMTracer, + EbpfTraceTEF, Baseline, compute_profiler_predictions, OverheadData, + load_precision_data, ) from varats.data.metrics import ConfusionMatrix from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config +from varats.project.project_domain import ProjectDomains +from varats.project.project_util import get_local_project_git_path from varats.table.table import Table from varats.table.table_utils import dataframe_to_table from varats.table.tables import TableFormat, TableGenerator +from varats.utils.git_util import calc_repo_loc, ChurnConfig, git class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): @@ -310,3 +318,94 @@ def generate(self) -> tp.List[Table]: return [ FeaturePerfOverheadTable(self.table_config, **self.table_kwargs) ] + + +class FeaturePerfMetricsOverviewTable(Table, table_name="fperf_overview"): + """Table showing some general information about feature performance case + studies.""" + + # TODO: refactor out + @staticmethod + def _calc_folder_locs(repo_path: Path, rev_range: str, folder: str) -> int: + churn_config = ChurnConfig.create_c_style_languages_config() + file_pattern = re.compile( + "|".join(churn_config.get_extensions_repr(r"^.*\.", r"$")) + ) + + loc: int = 0 + with local.cwd(repo_path): + files = git( + "ls-tree", + "-r", + "--name-only", + rev_range, + ).splitlines() + + for file in files: + if not file.startswith(folder): + continue + if file_pattern.match(file): + lines = git("show", f"{rev_range}:{file}").splitlines() + loc += len([line for line in lines if line]) + + return loc + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] + + df_precision = load_precision_data(case_studies, profilers) + + cs_data: tp.List[pd.DataFrame] = [] + for case_study in case_studies: + project_name = case_study.project_name + rev = case_study.revisions[0] + project_git_path = get_local_project_git_path(project_name) + + cs_precision_data = df_precision[df_precision['CaseStudy'] == + project_name] + regressions = len(cs_precision_data['Patch'].unique()) + + locs: int + if case_study.project_cls.DOMAIN == ProjectDomains.TEST: + src_folder = f'projects/{project_name}' + locs = self._calc_folder_locs( + project_git_path, rev.hash, src_folder + ) + else: + locs = calc_repo_loc(project_git_path, rev.hash) + + cs_dict = { + project_name: { + "NumConfig": + len(case_study.get_config_ids_for_revision(rev)), + "Locs": + locs, + "Regressions": + regressions, + } + } + + cs_data.append(pd.DataFrame.from_dict(cs_dict, orient='index')) + + df = pd.concat(cs_data).sort_index() + + style = df.style + kwargs: tp.Dict[str, tp.Any] = {} + if table_format.is_latex(): + kwargs["hrules"] = True + style.format(thousands=r"\,") + return dataframe_to_table(df, table_format, style, wrap_table, **kwargs) + + +class FeaturePerfMetricsOverviewTableGenerator( + TableGenerator, generator_name="fperf-overview", options=[] +): + """Generates a cs-metrics table for the selected case study(ies).""" + + def generate(self) -> tp.List[Table]: + return [ + FeaturePerfMetricsOverviewTable( + self.table_config, **self.table_kwargs + ) + ] From 986fef9e66c2182926b5c7ca7ec4737bb0011401 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 7 Sep 2023 10:40:01 +0200 Subject: [PATCH 115/224] Adds fp/fn ids to precision table output --- .../varats/data/databases/feature_perf_precision_database.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index ab679b2a3..7d9372707 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -650,11 +650,15 @@ def load_precision_data(case_studies, profilers) -> pd.DataFrame: new_row['recall'] = results.recall() new_row['f1_score'] = results.f1_score() new_row['Profiler'] = profiler.name + new_row['fp_ids'] = results.getFPs() + new_row['fn_ids'] = results.getFNs() else: new_row['precision'] = np.nan new_row['recall'] = np.nan new_row['f1_score'] = np.nan new_row['Profiler'] = profiler.name + new_row['fp_ids'] = [] + new_row['fn_ids'] = [] table_rows_plot.append(new_row) From 3e27539821e28a742b7ae457e3ee0aad5d413dd6 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 7 Sep 2023 11:06:32 +0200 Subject: [PATCH 116/224] Adds placeholder dist plot impl --- varats/varats/plots/feature_perf_precision.py | 101 +++++++----------- 1 file changed, 36 insertions(+), 65 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 7a7e5adc6..e685449bf 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -94,80 +94,51 @@ def generate(self) -> tp.List[Plot]: return [PerfPrecisionPlot(self.plot_config, **self.plot_kwargs)] -def get_fake_overhead_rows(): - fake_rows = [] - fake_prof = [("WXray", 10), ("PIMTracer", 42)] - - new_fake_row = { - 'CaseStudy': "fake", - # 'Patch': "fpatch", - 'WithoutProfiler_mean_time': 42, - 'WithoutProfiler_mean_ctx': 2, - } - - for prof, seed in fake_prof: - random.seed(seed) - # for _ in range(0, 3): - new_fake_row[f"{prof}_time_mean"] = random.randint(2, 230) - new_fake_row[f"{prof}_time_std"] = np.nan - new_fake_row[f"{prof}_time_max"] = np.nan - - new_fake_row[f"{prof}_ctx_mean"] = random.randint(2, 1230) - new_fake_row[f"{prof}_ctx_std"] = np.nan - new_fake_row[f"{prof}_ctx_max"] = np.nan - - fake_rows.append(new_fake_row) - - return fake_rows +class PerfPrecisionDistPlot(Plot, plot_name='fperf_precision_dist'): + def plot(self, view_mode: bool) -> None: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] -def get_fake_prec_rows_overhead() -> tp.List[tp.Any]: - fake_rows = [] - fake_prof = [("WXray", 10), ("PIMTracer", 42)] - for prof, seed in fake_prof: - random.seed(seed) - for _ in range(0, 3): - n = -0.1 if prof == "PIMTracer" else 0.0 - x = random.random() - y = random.random() - z = random.random() - new_fake_row = { - 'CaseStudy': "fake", - 'Patch': "fpatch", - 'Configs': 42, - 'RegressedConfigs': 21, - 'precision': x - n, - 'recall': y, - 'f1_score': z, - 'Profiler': prof - } - fake_rows.append(new_fake_row) + # Data aggregation + df = pd.DataFrame() + df = load_precision_data(case_studies, profilers) + # df = pd.concat([df, pd.DataFrame(get_fake_prec_rows())]) + df.sort_values(["CaseStudy"], inplace=True) + print(f"{df=}") - return fake_rows + grid = multivariate_grid( + df, + 'precision', + 'recall', + 'Profiler', + global_kde=False, + alpha=0.7, + legend=False, + s=100 + ) + grid.ax_marg_x.set_xlim(0.0, 1.02) + grid.ax_marg_y.set_ylim(0.0, 1.02) + grid.ax_joint.legend([name for name, _ in df.groupby("Profiler")]) + grid.ax_joint.set_xlabel("Precision") + grid.ax_joint.set_ylabel("Recall") + grid.ax_joint.xaxis.label.set_size(20) + grid.ax_joint.yaxis.label.set_size(20) -def get_fake_overhead_better_rows(): - # case_study, profiler, overhead_time, overhead_ctx - fake_cs = ["SynthSAContextSensitivity", "fake"] - fake_prof = [("WXray", 10), ("PIMTracer", 12)] - fake_rows = [] + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise UnsupportedOperation - for prof, seed in fake_prof: - random.seed(seed) - for cs in fake_cs: - # extra = 1 if prof == 'PIMTracer' else 0 +class PerfProfDistPlotGenerator( + PlotGenerator, generator_name="fperf-precision-dist", options=[] +): - new_fake_row = { - 'CaseStudy': cs, - 'Profiler': prof, - 'overhead_time': - (random.random() * 4) * 100, # random.randint(2, 230), - 'overhead_ctx': random.randint(2, 1230) - } - fake_rows.append(new_fake_row) + def generate(self) -> tp.List[Plot]: - return fake_rows + return [PerfPrecisionDistPlot(self.plot_config, **self.plot_kwargs)] class PerfOverheadPlot(Plot, plot_name='fperf_overhead'): From 924a0db435b83243e8db22a5ca8b918f820f17ae Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Thu, 7 Sep 2023 12:46:35 +0200 Subject: [PATCH 117/224] * First skeleton for HyTeg project --- varats/varats/projects/cpp_projects/hyteg.py | 58 ++++++++++++++++++++ varats/varats/tools/bb_config.py | 1 + 2 files changed, 59 insertions(+) create mode 100644 varats/varats/projects/cpp_projects/hyteg.py diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py new file mode 100644 index 000000000..9b104e07e --- /dev/null +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -0,0 +1,58 @@ +import typing as tp + +import benchbuild as bb +from benchbuild.utils.cmd import make, cmake, mkdir +from benchbuild.utils.settings import get_number_of_jobs +from plumbum import local + +from varats.project.project_domain import ProjectDomains +from varats.project.project_util import get_local_project_git_path +from varats.project.sources import FeatureSource +from varats.project.varats_project import VProject +from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap +from varats.utils.settings import bb_cfg + + +class HyTeg(VProject): + NAME = 'HyTeg' + GROUP = 'cpp_projects' + DOMAIN = ProjectDomains.CPP_LIBRARY + + SOURCE = [ + bb.source.Git( + remote="git@github.com:se-sic/hyteg-VaRA.git", + local="HyTeg", + refspec="origin/HEAD", + limit=None, + shallow=False + ), + FeatureSource() + ] + + WORKLOADS = {} + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List['ProjectBinaryWrapper']: + binaries = RevisionBinaryMap(get_local_project_git_path(HyTeg.NAME)) + + return binaries + + def compile(self) -> None: + hyteg_source = local.path(self.source_of(self.primary_source)) + + mkdir("-p", hyteg_source / "build") + + cc_compiler = bb.compiler.cc(self) + cxx_compiler = bb.compiler.cxx(self) + + with local.cwd(hyteg_source / "build"): + with local.env(CC=str(cc_compiler), CXX=str(cxx_compiler)): + bb.watch(cmake)("..") + + with local.cwd(hyteg_source / "build" / "apps"): + bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) + + def run_tests(self) -> None: + pass diff --git a/varats/varats/tools/bb_config.py b/varats/varats/tools/bb_config.py index 5d3b73bd2..8d374709e 100644 --- a/varats/varats/tools/bb_config.py +++ b/varats/varats/tools/bb_config.py @@ -75,6 +75,7 @@ def update_projects( 'varats.projects.cpp_projects.z3', 'varats.projects.cpp_projects.ect', 'varats.projects.cpp_projects.lepton' + 'varats.projects.cpp_projects.hyteg' ] projects_conf.value[:] += [ 'varats.projects.cpp_projects.doxygen', 'varats.projects.cpp_projects' From 4ddb9a561d21c7dce4fbb21a9a2502412f685b38 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 7 Sep 2023 20:48:22 +0200 Subject: [PATCH 118/224] Implements profiler comp with distributations --- varats/varats/plots/feature_perf_precision.py | 75 ++++++++++++++----- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index e685449bf..7551245c9 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -3,6 +3,7 @@ import typing as tp from itertools import chain +import matplotlib.colors as mcolors import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -103,28 +104,64 @@ def plot(self, view_mode: bool) -> None: # Data aggregation df = pd.DataFrame() df = load_precision_data(case_studies, profilers) - # df = pd.concat([df, pd.DataFrame(get_fake_prec_rows())]) df.sort_values(["CaseStudy"], inplace=True) - print(f"{df=}") - - grid = multivariate_grid( - df, - 'precision', - 'recall', - 'Profiler', - global_kde=False, - alpha=0.7, - legend=False, - s=100 + df = df.melt( + id_vars=['CaseStudy', 'Patch', 'Profiler'], + value_vars=['precision', 'recall'], + var_name='metric', + value_name="value" ) - grid.ax_marg_x.set_xlim(0.0, 1.02) - grid.ax_marg_y.set_ylim(0.0, 1.02) - grid.ax_joint.legend([name for name, _ in df.groupby("Profiler")]) - grid.ax_joint.set_xlabel("Precision") - grid.ax_joint.set_ylabel("Recall") - grid.ax_joint.xaxis.label.set_size(20) - grid.ax_joint.yaxis.label.set_size(20) + colors = sns.color_palette("Paired", len(case_studies) * 2) + _, axes = plt.subplots(ncols=len(profilers), nrows=1, sharey=True) + + for idx, profiler in enumerate(profilers): + ax = axes[idx] + color_slice = colors[idx * 2:idx * 2 + 2] + data_slice = df[df['Profiler'] == profiler.name] + + sns.violinplot( + data=data_slice, + x='Profiler', + y='value', + hue='metric', + inner='quartile', + cut=0, + split=True, + palette=color_slice, + alpha=.25, + linewidth=1, + ax=ax + ) + + sns.stripplot( + data=data_slice, + x="Profiler", + y="value", + hue="metric", + dodge=True, + palette=[ + mcolors.CSS4_COLORS['dimgrey'], + mcolors.CSS4_COLORS['darkgrey'] + ], + size=4, + ax=ax + ) + + ax.get_legend().remove() + + ax.set_ylabel(None) + ax.set_xlabel(None) + ax.tick_params(axis='x', labelsize=10, pad=8, length=6, width=1) + + if idx == 0: + ax.set_ylim(-0.1, 1.1) + ax.tick_params(axis='y', labelsize=10) + ax.tick_params(axis='y', width=1, length=3) + else: + ax.tick_params(left=False) + + plt.subplots_adjust(wspace=.0) def calc_missing_revisions( self, boundary_gradient: float From 52319e1af7ea0a1e23248d9f969adeb168835e4f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 7 Sep 2023 20:51:56 +0200 Subject: [PATCH 119/224] Adds exception handling for some reports that currently don't parse --- .../varats/data/databases/feature_perf_precision_database.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 7d9372707..802ac9c46 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -1,5 +1,6 @@ """Shared data aggregation function for analyzing feature performance.""" import abc +import traceback import typing as tp from collections import defaultdict @@ -454,11 +455,13 @@ def compute_profiler_predictions( result_dict[config_id] = profiler.is_regression( report_files[0], patch_name ) - except Exception: + except Exception as exception: # pylint: disable=W0718 print( f"FAILURE: Skipping {config_id=} of {project_name=}, " f"profiler={profiler.name}" ) + print(exception) + print(traceback.format_exc()) return result_dict From 93d782b0e02a1f707ce06857ffcd9e4c44d75302 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 14 Sep 2023 13:47:40 +0200 Subject: [PATCH 120/224] Polish dist plot --- varats/varats/plots/feature_perf_precision.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 7551245c9..8b6d7a3eb 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -125,7 +125,7 @@ def plot(self, view_mode: bool) -> None: x='Profiler', y='value', hue='metric', - inner='quartile', + inner=None, cut=0, split=True, palette=color_slice, @@ -139,7 +139,10 @@ def plot(self, view_mode: bool) -> None: x="Profiler", y="value", hue="metric", + jitter=0.1, dodge=True, + linewidth=0.5, + marker='x', palette=[ mcolors.CSS4_COLORS['dimgrey'], mcolors.CSS4_COLORS['darkgrey'] From 33824501d016d889b7d396a34c6b5782bf215522 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Fri, 15 Sep 2023 14:43:35 +0200 Subject: [PATCH 121/224] * Added Profiling Binary to HyTeg --- varats/varats/projects/cpp_projects/hyteg.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 9b104e07e..902ef6744 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -2,11 +2,12 @@ import benchbuild as bb from benchbuild.utils.cmd import make, cmake, mkdir +from benchbuild.utils.revision_ranges import SingleRevision from benchbuild.utils.settings import get_number_of_jobs from plumbum import local from varats.project.project_domain import ProjectDomains -from varats.project.project_util import get_local_project_git_path +from varats.project.project_util import get_local_project_git_path, BinaryType from varats.project.sources import FeatureSource from varats.project.varats_project import VProject from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap @@ -37,7 +38,15 @@ def binaries_for_revision( ) -> tp.List['ProjectBinaryWrapper']: binaries = RevisionBinaryMap(get_local_project_git_path(HyTeg.NAME)) - return binaries + binaries.specify_binary( + "build/apps/profiling/ProfilingApp", + BinaryType.EXECUTABLE, + only_valid_in=SingleRevision( + "f4711dadc3f61386e6ccdc704baa783253332db2" + ) + ) + + return binaries[revision] def compile(self) -> None: hyteg_source = local.path(self.source_of(self.primary_source)) @@ -54,5 +63,8 @@ def compile(self) -> None: with local.cwd(hyteg_source / "build" / "apps"): bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) + def recompile(self) -> None: + pass + def run_tests(self) -> None: pass From cbb958287afe20185c3725752a122aa4bf235d22 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 15 Sep 2023 16:15:25 +0200 Subject: [PATCH 122/224] Adapt overhead plot for paper --- varats/varats/plots/feature_perf_precision.py | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 8b6d7a3eb..554f7310f 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -185,12 +185,14 @@ class PerfOverheadPlot(Plot, plot_name='fperf_overhead'): def plot(self, view_mode: bool) -> None: # -- Configure plot -- - plot_metric = [("Time", "overhead_time_rel"), - ("Memory", "overhead_memory_rel"), - ("Major Page Faults", "overhead_major_page_faults_rel"), - ("Minor Page Faults", "overhead_minor_page_faults_rel"), - ("Filesystem Inputs", "overhead_fs_inputs_rel"), - ("Filesystem Outputs", "overhead_fs_outputs_rel")] + plot_metric = [ + ("Time", "overhead_time_rel"), + ("Memory", "overhead_memory_rel"), + #("Major Page Faults", "overhead_major_page_faults_rel"), + #("Minor Page Faults", "overhead_minor_page_faults_rel"), + #("Filesystem Inputs", "overhead_fs_inputs_rel"), + #("Filesystem Outputs", "overhead_fs_outputs_rel"), + ] target_row = "f1_score" # target_row = "precision" @@ -273,7 +275,7 @@ def plot(self, view_mode: bool) -> None: # print(f"{self.plot_config.width()}") - rows = 3 + rows = 1 _, axes = plt.subplots( ncols=int(len(plot_metric) / rows), nrows=rows, figsize=(30, 10) ) @@ -284,7 +286,13 @@ def plot(self, view_mode: bool) -> None: axes ) else: - for idx, ax in enumerate(list(chain.from_iterable(axes))): + print(f"{axes=}") + if rows == 1: + axes_list = list(axes) + else: + axes_list = list(chain.from_iterable(axes)) + + for idx, ax in enumerate(axes_list): self.do_single_plot( plot_metric[idx][1], target_row, merged_df, plot_metric[idx][0], ax @@ -301,14 +309,14 @@ def do_single_plot( hue="Profiler", style='CaseStudy', alpha=0.5, - s=100, + s=300, ax=ax ) for text_obj in ax.legend().get_texts(): text_obj: Text - text_obj.set_fontsize("small") + text_obj.set_fontsize("xx-large") if text_obj.get_text() == "Profiler": text_obj.set_text("Profilers") text_obj.set_fontweight("bold") @@ -336,9 +344,11 @@ def do_single_plot( ) + 20, 120 ) ax.set_xlim(x_limit, 100) - ax.xaxis.label.set_size(20) - ax.yaxis.label.set_size(20) - ax.tick_params(labelsize=15) + ax.tick_params(labelsize=20, pad=10) + ax.xaxis.label.set_size(25) + ax.yaxis.label.set_size(25) + ax.yaxis.labelpad = 10 + ax.xaxis.labelpad = 20 prof_df = merged_df[['Profiler', 'precision', x_values, 'f1_score' ]].groupby('Profiler').agg(['mean', 'std']) @@ -377,7 +387,7 @@ def do_single_plot( hue="Profiler", ax=ax, legend=False, - s=100, + s=300, zorder=2 ) @@ -387,7 +397,7 @@ def do_single_plot( ax=ax, color='firebrick', legend=False, - linewidth=2.5, + linewidth=3.5, zorder=1 ) From b1f3ebcced566dc8dcf5536295ef3a36ca9432fe Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 19 Sep 2023 08:44:08 +0200 Subject: [PATCH 123/224] Adds error tracking for incomplete reports (+ preciser region tracking) --- varats-core/varats/report/tef_report.py | 18 +++++++++- .../feature_perf_precision_database.py | 33 ++++++++++++++++--- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/varats-core/varats/report/tef_report.py b/varats-core/varats/report/tef_report.py index 644a2f818..d447133d1 100644 --- a/varats-core/varats/report/tef_report.py +++ b/varats-core/varats/report/tef_report.py @@ -1,6 +1,7 @@ """Report module to create and handle trace event format files, e.g., created with chrome tracing.""" +import logging import re import typing as tp from enum import Enum @@ -11,6 +12,8 @@ from varats.experiment.workload_util import WorkloadSpecificReportAggregate from varats.report.report import BaseReport, ReportAggregate +LOG = logging.getLogger(__name__) + class TraceEventType(Enum): """Enum to represent the different event types of trace format events, @@ -63,6 +66,14 @@ def __init__( self.__pid = int(json_trace_event["pid"]) self.__tid = int(json_trace_event["tid"]) + if "UUID" in json_trace_event: + self.__uuid: int = int(json_trace_event["UUID"]) + elif "ID" in json_trace_event: + self.__uuid: int = int(json_trace_event["ID"]) + else: + LOG.critical("Could not parse UUID/ID from trace event") + self.__uuid: int = 0 + @property def name(self) -> str: return self.__name_id_mapper.infer_name(self.__name_id) @@ -87,9 +98,14 @@ def pid(self) -> int: def tid(self) -> int: return self.__tid + @property + def uuid(self) -> int: + return self.__uuid + def __str__(self) -> str: return f"""{{ name: {self.name} + uuid: {self.uuid} cat: {self.category} ph: {self.event_type} ts: {self.timestamp} @@ -99,7 +115,7 @@ def __str__(self) -> str: """ def __repr__(self) -> str: - return str(self) + return f"{{ name={self.name}, uuid={self.uuid} }}" class TEFReport(BaseReport, shorthand="TEF", file_type="json"): diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 802ac9c46..bf644fc04 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -1,5 +1,6 @@ """Shared data aggregation function for analyzing feature performance.""" import abc +import logging import traceback import typing as tp from collections import defaultdict @@ -24,6 +25,8 @@ from varats.revision.revisions import get_processed_revisions_files from varats.utils.git_util import FullCommitHash +LOG = logging.getLogger(__name__) + def get_interactions_from_fr_string(interactions: str) -> str: """Convert the feature strings in a TEFReport from FR(x,y) to x*y, similar @@ -52,12 +55,31 @@ def get_feature_performance_from_tef_report( feature_performances: tp.Dict[str, int] = {} + def get_matching_event( + open_events: tp.List[TraceEvent], closing_event: TraceEvent + ): + for event in open_events: + # TODO: needs PID/TID checking + if event.uuid == closing_event.uuid: + open_events.remove(event) + return event + + LOG.error( + f"Could not find matching start for Event {repr(closing_event)}." + ) + + return None + for trace_event in tef_report.trace_events: if trace_event.category == "Feature": - if (trace_event.event_type == TraceEventType.DURATION_EVENT_BEGIN): - open_events.append(trace_event) - elif (trace_event.event_type == TraceEventType.DURATION_EVENT_END): - opening_event = open_events.pop() + if trace_event.event_type == TraceEventType.DURATION_EVENT_BEGIN: + # open_events.append(trace_event) + # insert event at the top of the list + open_events.insert(0, trace_event) + elif trace_event.event_type == TraceEventType.DURATION_EVENT_END: + opening_event = get_matching_event(open_events, trace_event) + if not opening_event: + continue end_timestamp = trace_event.timestamp begin_timestamp = opening_event.timestamp @@ -92,6 +114,9 @@ def get_feature_performance_from_tef_report( current_performance + end_timestamp - begin_timestamp ) + if open_events: + LOG.error(f"Not all events have been closed: {open_events}.") + return feature_performances From 99982bb0cfc3a4051a09c2e9d8cd34235e8134c3 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 24 Sep 2023 10:02:01 +0200 Subject: [PATCH 124/224] Adds checking for pid/tid to ensure we close the right measurement --- .../data/databases/feature_perf_precision_database.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index bf644fc04..77e7067c0 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -59,8 +59,11 @@ def get_matching_event( open_events: tp.List[TraceEvent], closing_event: TraceEvent ): for event in open_events: - # TODO: needs PID/TID checking - if event.uuid == closing_event.uuid: + if ( + event.uuid == closing_event.uuid and + event.pid == closing_event.pid and + event.tid == closing_event.tid + ): open_events.remove(event) return event From 3a6c5d32c5fcc2e55d5069855585ff2da1d3add3 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 26 Sep 2023 10:13:15 +0200 Subject: [PATCH 125/224] Implements alternative comparsion techniques --- requirements.txt | 1 + varats/setup.py | 1 + .../feature_perf_precision_database.py | 175 +++++++++++------- 3 files changed, 107 insertions(+), 70 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8f5185378..75c134524 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ benchbuild>=6.6.4 click>=8.1.3 +cliffs-delta>=1.0.0 distro>=1.5.0 graphviz>=0.14.2 ijson>=3.1.4 diff --git a/varats/setup.py b/varats/setup.py index 8bd6d6583..c0666a15b 100644 --- a/varats/setup.py +++ b/varats/setup.py @@ -44,6 +44,7 @@ "tabulate>=0.9", "varats-core>=13.0.5", "wllvm>=1.3.1", + "cliffs-delta>=1.0.0", ], author="Florian Sattler", author_email="sattlerf@cs.uni-saarland.de", diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 77e7067c0..24710c3f1 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd +from cliffs_delta import cliffs_delta from scipy.stats import ttest_ind import varats.experiments.vara.feature_perf_precision as fpp @@ -123,6 +124,107 @@ def get_matching_event( return feature_performances +def precise_pim_regression_check( + baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] +) -> bool: + is_regression = False + + for feature, old_values in baseline_pim.items(): + if feature in current_pim: + if feature == "Base": + # The regression should be identified in actual feature code + continue + + new_values = current_pim[feature] + ttest_res = ttest_ind(old_values, new_values) + + # TODO: check, maybe we need a "very small value cut off" + if ttest_res.pvalue < 0.05: + # print( + # f"{self.name} found regression for feature {feature}." + # ) + is_regression = True + else: + print(f"Could not find feature {feature} in new trace.") + # TODO: how to handle this? + raise NotImplementedError() + is_regression = True + + return is_regression + + +def cliffs_delta_pim_regression_check( + baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] +) -> bool: + is_regression = False + + for feature, old_values in baseline_pim.items(): + if feature in current_pim: + if feature == "Base": + # The regression should be identified in actual feature code + continue + + new_values = current_pim[feature] + d, res = cliffs_delta(old_values, new_values) + + # print(f"{d=}, {res=}") + + # if d > 0.70 or d < -0.7: + if res == "large": + # print( + # f"{self.name} found regression for feature {feature}." + # ) + is_regression = True + else: + print(f"Could not find feature {feature} in new trace.") + # TODO: how to handle this? + raise NotImplementedError() + is_regression = True + + return is_regression + + +def sum_pim_regression_check( + baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] +) -> bool: + # TODO: add some tests + baseline_pim_totals: tp.List[tp.List[int]] = [ + old_values for feature, old_values in baseline_pim.items() + if feature != "Base" + ] + print(f"{baseline_pim_totals=}") + current_pim_totals: tp.List[tp.List[int]] = [ + current_values for feature, current_values in current_pim.items() + if feature != "Base" + ] + print(f"{current_pim_totals=}") + + baseline_pim_total: tp.List[int] = [ + sum(values) for values in zip(*baseline_pim_totals) + ] + print(f"{baseline_pim_total=}") + current_pim_total: tp.List[int] = [ + sum(values) for values in zip(*current_pim_totals) + ] + print(f"{current_pim_total=}") + + # TODO: does not work for large numbers + return ttest_ind(baseline_pim_total, current_pim_total).pvalue < 0.05 + + +def pim_regression_check( + baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] +) -> bool: + """Compares two pims and determines if there was a regression between the + baseline and current.""" + # return cliffs_delta_pim_regression_check(baseline_pim, current_pim) + return precise_pim_regression_check(baseline_pim, current_pim) + + class Profiler(): """Profiler interface to add different profilers to the evaluation.""" @@ -176,8 +278,6 @@ def is_regression( self, report_path: ReportFilepath, patch_name: str ) -> bool: """Checks if there was a regression between the old an new data.""" - is_regression = False - multi_report = fpp.MultiPatchReport( report_path.full_path(), TEFReportAggregate ) @@ -198,27 +298,7 @@ def is_regression( for feature, value in pim.items(): new_acc_pim[feature].append(value) - for feature, old_values in old_acc_pim.items(): - if feature == "Base": - # The regression should be identified in actual feature code - continue - - if feature in new_acc_pim: - new_values = new_acc_pim[feature] - ttest_res = ttest_ind(old_values, new_values) - - # TODO: check, maybe we need a "very small value cut off" - if ttest_res.pvalue < 0.05: - # print( - # f"{self.name} found regression for feature {feature}." - # ) - is_regression = True - else: - print(f"Could not find feature {feature} in new trace.") - # TODO: how to handle this? - is_regression = True - - return is_regression + return pim_regression_check(old_acc_pim, new_acc_pim) class PIMTracer(Profiler): @@ -253,8 +333,6 @@ def is_regression( self, report_path: ReportFilepath, patch_name: str ) -> bool: """Checks if there was a regression between the old an new data.""" - is_regression = False - multi_report = fpp.MultiPatchReport( report_path.full_path(), fpp.PerfInfluenceTraceReportAggregate ) @@ -274,28 +352,7 @@ def is_regression( print(e) return False - # TODO: same for TEF - for feature, old_values in old_acc_pim.items(): - if feature in new_acc_pim: - if feature == "Base": - # The regression should be identified in actual feature code - continue - - new_values = new_acc_pim[feature] - ttest_res = ttest_ind(old_values, new_values) - - # TODO: check, maybe we need a "very small value cut off" - if ttest_res.pvalue < 0.05: - # print( - # f"{self.name} found regression for feature {feature}." - # ) - is_regression = True - else: - print(f"Could not find feature {feature} in new trace.") - # TODO: how to handle this? - is_regression = True - - return is_regression + return pim_regression_check(old_acc_pim, new_acc_pim) class EbpfTraceTEF(Profiler): @@ -311,8 +368,6 @@ def is_regression( self, report_path: ReportFilepath, patch_name: str ) -> bool: """Checks if there was a regression between the old an new data.""" - is_regression = False - multi_report = fpp.MultiPatchReport( report_path.full_path(), TEFReportAggregate ) @@ -333,27 +388,7 @@ def is_regression( for feature, value in pim.items(): new_acc_pim[feature].append(value) - for feature, old_values in old_acc_pim.items(): - if feature == "Base": - # The regression should be identified in actual feature code - continue - - if feature in new_acc_pim: - new_values = new_acc_pim[feature] - ttest_res = ttest_ind(old_values, new_values) - - # TODO: check, maybe we need a "very small value cut off" - if ttest_res.pvalue < 0.05: - # print( - # f"{self.name} found regression for feature {feature}." - # ) - is_regression = True - else: - print(f"Could not find feature {feature} in new trace.") - # TODO: how to handle this? - is_regression = True - - return is_regression + return pim_regression_check(old_acc_pim, new_acc_pim) def get_patch_names(case_study: CaseStudy) -> tp.List[str]: From 3ea74fea20743df8fe6861019b64508d2e4a8f39 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 26 Sep 2023 13:00:59 +0200 Subject: [PATCH 126/224] * Added cflags to disable phasar for hyteg --- varats/varats/projects/cpp_projects/hyteg.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 902ef6744..a5b053748 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -6,6 +6,7 @@ from benchbuild.utils.settings import get_number_of_jobs from plumbum import local +from varats.paper.paper_config import PaperConfigSpecificGit from varats.project.project_domain import ProjectDomains from varats.project.project_util import get_local_project_git_path, BinaryType from varats.project.sources import FeatureSource @@ -20,7 +21,8 @@ class HyTeg(VProject): DOMAIN = ProjectDomains.CPP_LIBRARY SOURCE = [ - bb.source.Git( + PaperConfigSpecificGit( + project_name="HyTeg", remote="git@github.com:se-sic/hyteg-VaRA.git", local="HyTeg", refspec="origin/HEAD", @@ -53,6 +55,10 @@ def compile(self) -> None: mkdir("-p", hyteg_source / "build") + # Currently Phasar passes crash the compiler + # This limits us to analysing compile time variability + self.cflags += ["-mllvm", "--vara-disable-phasar"] + cc_compiler = bb.compiler.cc(self) cxx_compiler = bb.compiler.cxx(self) From fe038008630daf5bff5ce294f577f2aebfd0949f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 28 Sep 2023 20:01:45 +0200 Subject: [PATCH 127/224] Adds extra option call to vara xray runner --- varats/varats/experiments/vara/feature_experiment.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/varats/varats/experiments/vara/feature_experiment.py b/varats/varats/experiments/vara/feature_experiment.py index 4f48a8251..98615161c 100644 --- a/varats/varats/experiments/vara/feature_experiment.py +++ b/varats/varats/experiments/vara/feature_experiment.py @@ -338,6 +338,10 @@ def run_traced_code(self) -> StepResult: flush=True ) with cleanup(prj_command): + pb_cmd = pb_cmd[get_extra_config_options( + self.project + )] + print(f"{pb_cmd=}") _, _, err = pb_cmd.run() xray = re.findall( r"XRay: Log file in '(.+?)'", From 4de2e50fb531b50e501b0489df28169ff7fb2498 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 29 Sep 2023 21:25:29 +0200 Subject: [PATCH 128/224] Adds result table --- .../experiments/vara/feature_perf_runner.py | 4 +- .../varats/tables/feature_perf_precision.py | 209 ++++++++++++++++++ 2 files changed, 212 insertions(+), 1 deletion(-) diff --git a/varats/varats/experiments/vara/feature_perf_runner.py b/varats/varats/experiments/vara/feature_perf_runner.py index 7e69134f1..5544bb7f2 100644 --- a/varats/varats/experiments/vara/feature_perf_runner.py +++ b/varats/varats/experiments/vara/feature_perf_runner.py @@ -83,7 +83,9 @@ def actions_for_project( ) -> tp.MutableSequence[actions.Step]: project.cflags += self.get_vara_feature_cflags(project) - project.cflags += self.get_vara_tracing_cflags(FeatureInstrType.TEF) + project.cflags += self.get_vara_tracing_cflags( + FeatureInstrType.TEF, instruction_threshold=1 + ) project.cflags += [ "-fxray-instrument", diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 295db9646..671a97164 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -3,6 +3,8 @@ import typing as tp from pathlib import Path +import matplotlib.colors as colors +import matplotlib.pyplot as plt import numpy as np import pandas as pd from plumbum import local, TF, RETCODE @@ -21,6 +23,7 @@ compute_profiler_predictions, OverheadData, load_precision_data, + load_overhead_data, ) from varats.data.metrics import ConfusionMatrix from varats.paper.case_study import CaseStudy @@ -33,6 +36,41 @@ from varats.utils.git_util import calc_repo_loc, ChurnConfig, git +def cmap_map(function, cmap): + """ + Applies function (which should operate on vectors of shape 3: [r, g, b]), on + colormap cmap. + + This routine will break any discontinuous points in a colormap. + """ + cdict = cmap._segmentdata + step_dict = {} + # Firt get the list of points where the segments start or end + for key in ('red', 'green', 'blue'): + step_dict[key] = list(map(lambda x: x[0], cdict[key])) + step_list = sum(step_dict.values(), []) + step_list = np.array(list(set(step_list))) + # Then compute the LUT, and apply the function to the LUT + reduced_cmap = lambda step: np.array(cmap(step)[0:3]) + old_LUT = np.array(list(map(reduced_cmap, step_list))) + new_LUT = np.array(list(map(function, old_LUT))) + # Now try to make a minimal segment definition of the new LUT + cdict = {} + for i, key in enumerate(['red', 'green', 'blue']): + this_cdict = {} + for j, step in enumerate(step_list): + if step in step_dict[key]: + this_cdict[step] = new_LUT[j, i] + elif new_LUT[j, i] != old_LUT[j, i]: + this_cdict[step] = new_LUT[j, i] + colorvector = list(map(lambda x: x + (x[1],), this_cdict.items())) + colorvector.sort() + cdict[key] = colorvector + + import matplotlib + return matplotlib.colors.LinearSegmentedColormap('colormap', cdict, 1024) + + class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): """Table that compares the precision of different feature performance measurement approaches.""" @@ -320,6 +358,177 @@ def generate(self) -> tp.List[Table]: ] +def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100): + new_cmap = colors.LinearSegmentedColormap.from_list( + 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval), + cmap(np.linspace(minval, maxval, n)) + ) + return new_cmap + + +class FeaturePerfOverheadComparisionTable(Table, table_name="fperf_overhead"): + """Table that compares overhead of different feature performance measurement + approaches.""" + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] + + # Data aggregation + full_precision_df = load_precision_data(case_studies, profilers) + full_precision_df.sort_values(["CaseStudy"], inplace=True) + + precision_df = full_precision_df[[ + "CaseStudy", "precision", "recall", "Profiler", "f1_score" + ]] + # aggregate multiple revisions + precision_df = precision_df.groupby(['CaseStudy', "Profiler"], + as_index=False).agg({ + 'precision': 'mean', + 'recall': 'mean', + 'f1_score': 'mean' + }) + print(f"precision_df=\n{precision_df}") + + overhead_df = load_overhead_data(case_studies, profilers) + overhead_df = overhead_df[[ + "CaseStudy", "Profiler", "time", "memory", "overhead_time", + "overhead_memory" + ]] + # print(f"{overhead_df=}") + # TODO: double check and refactor + overhead_df['overhead_time_rel'] = overhead_df['time'] / ( + overhead_df['time'] - overhead_df['overhead_time'] + ) * 100 - 100 + + overhead_df['overhead_memory_rel'] = overhead_df['memory'] / ( + overhead_df['memory'] - overhead_df['overhead_memory'] + ) * 100 - 100 + overhead_df['overhead_memory_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + print(f"{overhead_df=}") + + # Merge with precision data + merged_df = pd.merge( + precision_df, overhead_df, on=["CaseStudy", "Profiler"] + ) + print(f"merged_df=\n{merged_df}") + + pivot_df = merged_df.pivot( + index='CaseStudy', + columns='Profiler', + values=[ + 'precision', 'recall', 'overhead_time_rel', + 'overhead_memory_rel' + ] + ) + + # print(f"pivot_df=\n{pivot_df}") + # print(f"{pivot_df.columns=}") + pivot_df = pivot_df.swaplevel(0, 1, 1).sort_index(axis=1) + + # print(f"pivot_df=\n{pivot_df}") + columns = [ + 'precision', 'recall', 'overhead_time_rel', 'overhead_memory_rel' + ] + pivot_df = pivot_df.reindex([ + (prof.name, c) for prof in profilers for c in columns + ], + axis=1) + print(f"pivot_df=\n{pivot_df}") + + # print(f"{pivot_df.columns=}") + + pivot_df.loc["Total"] = pivot_df.mean() + print(f"pivot_df=\n{pivot_df}") + + # Rename columns + overhead_time_c_name = "$\Delta$ Time $(\%)$" + overhead_memory_c_name = "$\Delta$ Mem $(\%)$" + pivot_df = pivot_df.rename( + columns={ + "precision": "Precision", + "recall": "Recall", + "overhead_time_rel": overhead_time_c_name, + "overhead_memory_rel": overhead_memory_c_name, + } + ) + + style: pd.io.formats.style.Styler = pivot_df.style + kwargs: tp.Dict[str, tp.Any] = {} + + def add_extras(doc: Document) -> None: + doc.packages.append(Package("amsmath")) + doc.packages.append(Package("amssymb")) + + if table_format.is_latex(): + style.format(precision=2) + + ryg_map = plt.get_cmap('RdYlGn') + ryg_map = cmap_map(lambda x: x / 1.2 + 0.2, ryg_map) + + style.background_gradient( + cmap=ryg_map, + subset=[(prof.name, 'Precision') for prof in profilers], + vmin=0.0, + vmax=1.0, + ) + style.background_gradient( + cmap=ryg_map, + subset=[(prof.name, 'Recall') for prof in profilers], + vmin=0.0, + vmax=1.0, + ) + + gray_map = plt.get_cmap('binary') + gray_map = truncate_colormap(gray_map, 0, 0.6, 200) + style.background_gradient( + cmap=gray_map, + subset=[(prof.name, overhead_time_c_name) for prof in profilers + ], + vmin=0.0, + vmax=100.0, + ) + + style.background_gradient( + cmap=gray_map, + subset=[ + (prof.name, overhead_memory_c_name) for prof in profilers + ], + vmin=0.0, + vmax=100.0, + ) + + kwargs["convert_css"] = True + kwargs["column_format"] = "l" + "".join(["rrrr" for _ in profilers]) + kwargs["hrules"] = True + kwargs["multicol_align"] = "c" + + return dataframe_to_table( + data=pivot_df, + table_format=table_format, + style=style, + wrap_table=wrap_table, + wrap_landscape=True, + document_decorator=add_extras, + **kwargs + ) + + +class FeaturePerfOverheadComparisionTableGenerator( + TableGenerator, generator_name="fperf-overhead-comp", options=[] +): + """Generator for `FeaturePerfOverheadTable`.""" + + def generate(self) -> tp.List[Table]: + return [ + FeaturePerfOverheadComparisionTable( + self.table_config, **self.table_kwargs + ) + ] + + class FeaturePerfMetricsOverviewTable(Table, table_name="fperf_overview"): """Table showing some general information about feature performance case studies.""" From 52fbf5673eb78f217112ee87cbeffd7a9064929a Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 4 Oct 2023 09:05:01 +0200 Subject: [PATCH 129/224] * Disabled MPI build for HyTeG/Walberla --- varats/varats/projects/cpp_projects/hyteg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index a5b053748..f56aa834d 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -64,7 +64,7 @@ def compile(self) -> None: with local.cwd(hyteg_source / "build"): with local.env(CC=str(cc_compiler), CXX=str(cxx_compiler)): - bb.watch(cmake)("..") + bb.watch(cmake)("..", "-DWALBERLA_BUILD_WITH_MPI=OFF") with local.cwd(hyteg_source / "build" / "apps"): bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) From 85c986c96789af164f28c4e8fe0f7837e268b687 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 4 Oct 2023 10:42:54 +0200 Subject: [PATCH 130/224] * Disabled Doc build for HyTeG --- varats/varats/projects/cpp_projects/hyteg.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index f56aa834d..cfde582cd 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -64,7 +64,10 @@ def compile(self) -> None: with local.cwd(hyteg_source / "build"): with local.env(CC=str(cc_compiler), CXX=str(cxx_compiler)): - bb.watch(cmake)("..", "-DWALBERLA_BUILD_WITH_MPI=OFF") + bb.watch(cmake)( + "..", "-DWALBERLA_BUILD_WITH_MPI=OFF", + "-DHYTEG_BUILD_DOC=OFF" + ) with local.cwd(hyteg_source / "build" / "apps"): bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) From bbceb1aa33cf7e91e2823f78ea910781b878b1a2 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 5 Oct 2023 15:57:43 +0200 Subject: [PATCH 131/224] Adds patch tags and workload for Runtime --- varats-core/varats/experiment/workload_util.py | 9 ++++++++- .../experiments/vara/feature_perf_precision.py | 16 ++++++++++------ .../perf_tests/feature_perf_cs_collection.py | 13 +++++++++++++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/varats-core/varats/experiment/workload_util.py b/varats-core/varats/experiment/workload_util.py index 38b82720f..6cf793572 100644 --- a/varats-core/varats/experiment/workload_util.py +++ b/varats-core/varats/experiment/workload_util.py @@ -106,11 +106,18 @@ def filter_by_config(prj_cmd: ProjectCommand) -> bool: return prj_cmd.command.can_be_executed_by(extra_options, patches) return True - return [ + workloads = [ cmd for cmd in project_cmds if cmd.path.name == binary.name and filter_by_config(cmd) ] + if not workloads: + print( + f"Could not find any workloads for {project.name}." + f"Tags[{requested_workload_tags}]" + ) + return workloads + def create_workload_specific_filename( filename_base: str, diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 28acc21e4..71b6b7b60 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -402,9 +402,11 @@ def setup_actions_for_vara_experiment( ) patch_provider = PatchProvider.get_provider_for_project(project) - patches = patch_provider.get_patches_for_revision( - ShortCommitHash(project.version_of_primary) - ) + patches = [ + p for p in patch_provider. + get_patches_for_revision(ShortCommitHash(project.version_of_primary)) + if 'perf_prec' in p.tags + ] print(f"{patches=}") patch_steps = [] @@ -643,9 +645,11 @@ def actions_for_project( ) patch_provider = PatchProvider.get_provider_for_project(project) - patches = patch_provider.get_patches_for_revision( - ShortCommitHash(project.version_of_primary) - ) + patches = [ + p for p in patch_provider.get_patches_for_revision( + ShortCommitHash(project.version_of_primary) + ) if 'perf_prec' in p.tags + ] print(f"{patches=}") patch_steps = [] diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 56223da79..1d1357d32 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -745,6 +745,19 @@ class SynthIPRuntime(VProject): ] WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + "-c", + "<", + "geo-maps/countries-land-1km.geo.json", + ">", + "geo-maps/countries-land-1km.geo.json.compressed", + label="countries-land-1km", + creates=["geo-maps/countries-land-1km.geo.json.compressed"], + requires_all_args={"-c"} + ) + ], WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), From 3b3e300dade43f9f93ec70758271732d59eab36a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 5 Oct 2023 16:09:52 +0200 Subject: [PATCH 132/224] Fixes workload file selection for SynthIP* case studies --- .../perf_tests/feature_perf_cs_collection.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 1d1357d32..23c542e9e 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -776,9 +776,9 @@ class SynthIPRuntime(VProject): SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), "-c", "<", - "geo-maps/countries-land-1km.geo.json", + "geo-maps/countries-land-1m.geo.json", ">", - "geo-maps/countries-land-1km.geo.json.compressed", + "geo-maps/countries-land-1m.geo.json.compressed", label="countries-land-1m", creates=["geo-maps/countries-land-1m.geo.json.compressed"], requires_all_args={"-c"} @@ -850,9 +850,9 @@ class SynthIPTemplate(VProject): SourceRoot("SynthIPTemplate") / RSBinary("Template"), "-c", "<", - "geo-maps/countries-land-1km.geo.json", + "geo-maps/countries-land-1m.geo.json", ">", - "geo-maps/countries-land-1km.geo.json.compressed", + "geo-maps/countries-land-1m.geo.json.compressed", label="countries-land-1m", creates=["geo-maps/countries-land-1m.geo.json.compressed"], requires_all_patch={"Compress"} @@ -924,9 +924,9 @@ class SynthIPTemplate2(VProject): SourceRoot("SynthIPTemplate2") / RSBinary("Template2"), "-c", "<", - "geo-maps/countries-land-1km.geo.json", + "geo-maps/countries-land-1m.geo.json", ">", - "geo-maps/countries-land-1km.geo.json.compressed", + "geo-maps/countries-land-1m.geo.json.compressed", label="countries-land-1m", creates=["geo-maps/countries-land-1m.geo.json.compressed"], requires_all_patch={"Compress"} @@ -998,9 +998,9 @@ class SynthIPCombined(VProject): SourceRoot("SynthIPCombined") / RSBinary("Combined"), "-c", "<", - "geo-maps/countries-land-1km.geo.json", + "geo-maps/countries-land-1m.geo.json", ">", - "geo-maps/countries-land-1km.geo.json.compressed", + "geo-maps/countries-land-1m.geo.json.compressed", label="countries-land-1m", creates=["geo-maps/countries-land-1m.geo.json.compressed"], requires_all_args={"-c"} From 4bb973b2432846ba332614d701a8e200ada72c05 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 5 Oct 2023 17:55:58 +0200 Subject: [PATCH 133/224] Rework workload selection --- .../varats/experiment/workload_util.py | 2 +- .../feature_perf_precision_database.py | 6 ++- .../vara/feature_perf_precision.py | 45 +++++++++++-------- .../perf_tests/feature_perf_cs_collection.py | 13 ------ 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/varats-core/varats/experiment/workload_util.py b/varats-core/varats/experiment/workload_util.py index 6cf793572..6e0cc5074 100644 --- a/varats-core/varats/experiment/workload_util.py +++ b/varats-core/varats/experiment/workload_util.py @@ -112,7 +112,7 @@ def filter_by_config(prj_cmd: ProjectCommand) -> bool: ] if not workloads: - print( + LOG.debug( f"Could not find any workloads for {project.name}." f"Tags[{requested_workload_tags}]" ) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 24710c3f1..c6262b32d 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -416,7 +416,7 @@ def get_patch_names(case_study: CaseStudy) -> tp.List[str]: def get_regressing_config_ids_gt( project_name: str, case_study: CaseStudy, rev: FullCommitHash, - report_name: str + patch_name: str ) -> tp.Optional[tp.Dict[int, bool]]: """Computes the baseline data, i.e., the config ids where a regression was identified.""" @@ -445,7 +445,7 @@ def get_regressing_config_ids_gt( old_time = time_reports.get_baseline_report() # new_time = time_reports.get_new_report() - new_time = time_reports.get_report_for_patch(report_name) + new_time = time_reports.get_report_for_patch(patch_name) if not new_time: return None @@ -453,6 +453,8 @@ def get_regressing_config_ids_gt( ) == np.mean(new_time.measurements_wall_clock_time): gt[config_id] = False else: + # TODO: fix to use same check as profilers + # TODO: double check ttest handling ttest_res = ttest_ind( old_time.measurements_wall_clock_time, diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 71b6b7b60..12c8742f3 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -8,7 +8,7 @@ from time import sleep import benchbuild.extensions as bb_ext -from benchbuild.command import cleanup +from benchbuild.command import cleanup, ProjectCommand from benchbuild.environments.domain.declarative import ContainerImage from benchbuild.utils import actions from benchbuild.utils.actions import StepResult, Clean @@ -51,6 +51,16 @@ REPS = 3 +def perf_prec_workload_commands( + project: VProject, binary: ProjectBinaryWrapper +) -> tp.List[ProjectCommand]: + return workload_commands(project, binary, [ + WorkloadCategory.EXAMPLE + ]) + workload_commands(project, binary, [ + WorkloadCategory.SMALL + ]) + workload_commands(project, binary, [WorkloadCategory.MEDIUM]) + + class AnalysisProjectStepBase(OutputFolderStep): project: VProject @@ -132,8 +142,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: zip_tmp_dir = tmp_dir / self._file_name with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: for rep in range(0, self._reps): - for prj_command in workload_commands( - self.project, self._binary, [WorkloadCategory.EXAMPLE] + for prj_command in perf_prec_workload_commands( + self.project, self._binary ): local_tracefile_path = Path(reps_tmp_dir) / ( f"trace_{prj_command.command.label}_{rep}" @@ -192,9 +202,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: with tempfile.TemporaryDirectory() as non_nfs_tmp_dir: with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: for rep in range(0, self._reps): - for prj_command in workload_commands( - self.project, self._binary, - [WorkloadCategory.EXAMPLE] + for prj_command in perf_prec_workload_commands( + self.project, self._binary ): local_tracefile_path = Path(reps_tmp_dir) / ( f"trace_{prj_command.command.label}_{rep}" @@ -302,8 +311,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: zip_tmp_dir = tmp_dir / self._file_name with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: for rep in range(0, self._reps): - for prj_command in workload_commands( - self.project, self._binary, [WorkloadCategory.EXAMPLE] + for prj_command in perf_prec_workload_commands( + self.project, self._binary ): local_tracefile_path = Path(reps_tmp_dir) / ( f"trace_{prj_command.command.label}_{rep}" @@ -572,8 +581,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: zip_tmp_dir = tmp_dir / self.__file_name with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: for rep in range(0, self.__reps): - for prj_command in workload_commands( - self.project, self.__binary, [WorkloadCategory.EXAMPLE] + for prj_command in perf_prec_workload_commands( + self.project, self.__binary ): time_report_file = Path(reps_tmp_dir) / ( f"baseline_{prj_command.command.label}_{rep}" @@ -723,8 +732,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): for rep in range(0, self._reps): - for prj_command in workload_commands( - self.project, self._binary, [WorkloadCategory.EXAMPLE] + for prj_command in perf_prec_workload_commands( + self.project, self._binary ): base = Path("/tmp/") fake_tracefile_path = base / ( @@ -789,8 +798,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: with local.cwd(local.path(self.project.builddir)): with tempfile.TemporaryDirectory() as non_nfs_tmp_dir: for rep in range(0, self._reps): - for prj_command in workload_commands( - self.project, self._binary, [WorkloadCategory.EXAMPLE] + for prj_command in perf_prec_workload_commands( + self.project, self._binary ): base = Path(non_nfs_tmp_dir) fake_tracefile_path = base / ( @@ -874,8 +883,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): for rep in range(0, self._reps): - for prj_command in workload_commands( - self.project, self._binary, [WorkloadCategory.EXAMPLE] + for prj_command in perf_prec_workload_commands( + self.project, self._binary ): base = Path("/tmp/") fake_tracefile_path = base / ( @@ -1098,8 +1107,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: """Runs the binary with the embedded tracing code.""" with local.cwd(local.path(self.project.builddir)): for rep in range(0, self.__reps): - for prj_command in workload_commands( - self.project, self.__binary, [WorkloadCategory.EXAMPLE] + for prj_command in perf_prec_workload_commands( + self.project, self.__binary ): time_report_file = tmp_dir / ( f"overhead_{prj_command.command.label}_{rep}" diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 23c542e9e..04f92776e 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -745,19 +745,6 @@ class SynthIPRuntime(VProject): ] WORKLOADS = { - WorkloadSet(WorkloadCategory.EXAMPLE): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - "-c", - "<", - "geo-maps/countries-land-1km.geo.json", - ">", - "geo-maps/countries-land-1km.geo.json.compressed", - label="countries-land-1km", - creates=["geo-maps/countries-land-1km.geo.json.compressed"], - requires_all_args={"-c"} - ) - ], WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), From 9ccd2cf782720698696236c8c2e8997069741b1c Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 5 Oct 2023 18:18:31 +0200 Subject: [PATCH 134/224] Rework new patch selection --- .../experiments/vara/feature_perf_precision.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 12c8742f3..563076755 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -50,6 +50,8 @@ REPS = 3 +IDENTIFIER_PATCH_TAG = 'perf_prec' + def perf_prec_workload_commands( project: VProject, binary: ProjectBinaryWrapper @@ -411,11 +413,9 @@ def setup_actions_for_vara_experiment( ) patch_provider = PatchProvider.get_provider_for_project(project) - patches = [ - p for p in patch_provider. - get_patches_for_revision(ShortCommitHash(project.version_of_primary)) - if 'perf_prec' in p.tags - ] + patches = patch_provider.get_patches_for_revision( + ShortCommitHash(project.version_of_primary) + )[IDENTIFIER_PATCH_TAG] print(f"{patches=}") patch_steps = [] @@ -654,11 +654,9 @@ def actions_for_project( ) patch_provider = PatchProvider.get_provider_for_project(project) - patches = [ - p for p in patch_provider.get_patches_for_revision( - ShortCommitHash(project.version_of_primary) - ) if 'perf_prec' in p.tags - ] + patches = patch_provider.get_patches_for_revision( + ShortCommitHash(project.version_of_primary) + )[IDENTIFIER_PATCH_TAG] print(f"{patches=}") patch_steps = [] From 083c66b21bfe291a282230f51d6f73136a66b0d4 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 5 Oct 2023 18:44:58 +0200 Subject: [PATCH 135/224] Prepare integration of template bases systems --- varats/varats/experiments/vara/feature_perf_precision.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 563076755..baefd6258 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -29,6 +29,7 @@ get_extra_config_options, ZippedExperimentSteps, OutputFolderStep, + get_config_patch_steps, ) from varats.experiment.steps.patch import ApplyPatch, RevertPatch from varats.experiment.steps.recompile import ReCompile @@ -434,7 +435,7 @@ def setup_actions_for_vara_experiment( ) patch_steps.append(RevertPatch(project, patch)) - analysis_actions = [] + analysis_actions = get_config_patch_steps(project) analysis_actions.append(actions.Compile(project)) analysis_actions.append( @@ -675,7 +676,7 @@ def actions_for_project( ) patch_steps.append(RevertPatch(project, patch)) - analysis_actions = [] + analysis_actions = get_config_patch_steps(project) analysis_actions.append(actions.Compile(project)) analysis_actions.append( @@ -963,7 +964,7 @@ def setup_actions_for_vara_overhead_experiment( get_current_config_id(project) ) - analysis_actions = [] + analysis_actions = get_config_patch_steps(project) analysis_actions.append(actions.Compile(project)) analysis_actions.append( @@ -1179,7 +1180,7 @@ def actions_for_project( get_current_config_id(project) ) - analysis_actions = [] + analysis_actions = get_config_patch_steps(project) analysis_actions.append(actions.Compile(project)) analysis_actions.append( From 47197e75113835625a6fc565cff2f257103cd58a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 5 Oct 2023 18:52:59 +0200 Subject: [PATCH 136/224] Adds missing init py --- varats/varats/ts_utils/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 varats/varats/ts_utils/__init__.py diff --git a/varats/varats/ts_utils/__init__.py b/varats/varats/ts_utils/__init__.py new file mode 100644 index 000000000..e69de29bb From 968d0691e604ab8726c776d50023a13314b71b58 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 5 Oct 2023 19:11:51 +0200 Subject: [PATCH 137/224] Removes debug line --- varats-core/varats/experiment/workload_util.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/varats-core/varats/experiment/workload_util.py b/varats-core/varats/experiment/workload_util.py index 6e0cc5074..38b82720f 100644 --- a/varats-core/varats/experiment/workload_util.py +++ b/varats-core/varats/experiment/workload_util.py @@ -106,18 +106,11 @@ def filter_by_config(prj_cmd: ProjectCommand) -> bool: return prj_cmd.command.can_be_executed_by(extra_options, patches) return True - workloads = [ + return [ cmd for cmd in project_cmds if cmd.path.name == binary.name and filter_by_config(cmd) ] - if not workloads: - LOG.debug( - f"Could not find any workloads for {project.name}." - f"Tags[{requested_workload_tags}]" - ) - return workloads - def create_workload_specific_filename( filename_base: str, From ef967efc0d9aea63a8168433f19bc5e682f0c14c Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Fri, 6 Oct 2023 08:24:37 +0200 Subject: [PATCH 138/224] * Add HyTeG Workload --- varats/varats/projects/cpp_projects/hyteg.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index cfde582cd..fcd08994b 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -1,9 +1,11 @@ import typing as tp import benchbuild as bb +from benchbuild.command import WorkloadSet, Command, SourceRoot from benchbuild.utils.cmd import make, cmake, mkdir from benchbuild.utils.revision_ranges import SingleRevision from benchbuild.utils.settings import get_number_of_jobs +from experiment.workload_util import WorkloadCategory, RSBinary from plumbum import local from varats.paper.paper_config import PaperConfigSpecificGit @@ -18,7 +20,7 @@ class HyTeg(VProject): NAME = 'HyTeg' GROUP = 'cpp_projects' - DOMAIN = ProjectDomains.CPP_LIBRARY + DOMAIN = ProjectDomains.TEST SOURCE = [ PaperConfigSpecificGit( @@ -32,7 +34,15 @@ class HyTeg(VProject): FeatureSource() ] - WORKLOADS = {} + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("HyTeG/build/apps/profiling") / + RSBinary('ProfilingApp'), + label='ProfilingApp' + ) + ] + } @staticmethod def binaries_for_revision( @@ -69,7 +79,7 @@ def compile(self) -> None: "-DHYTEG_BUILD_DOC=OFF" ) - with local.cwd(hyteg_source / "build" / "apps"): + with local.cwd(hyteg_source / "build" / "apps" / "profiling"): bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) def recompile(self) -> None: From 4d1f22d85da5594a5beed3cfeea73460ecbca73d Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 6 Oct 2023 09:49:19 +0200 Subject: [PATCH 139/224] Try fixing bzip2 --- varats/varats/projects/c_projects/bzip2.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/varats/varats/projects/c_projects/bzip2.py b/varats/varats/projects/c_projects/bzip2.py index fa9de73d7..d89cbd9fe 100644 --- a/varats/varats/projects/c_projects/bzip2.py +++ b/varats/varats/projects/c_projects/bzip2.py @@ -3,7 +3,7 @@ from pathlib import Path import benchbuild as bb -from benchbuild.command import Command, SourceRoot, WorkloadSet +from benchbuild.command import SourceRoot, WorkloadSet from benchbuild.source import HTTPMultiple from benchbuild.utils.cmd import cmake, make from benchbuild.utils.revision_ranges import RevisionRange, GoodBadSubgraph @@ -20,6 +20,8 @@ BinaryType, verify_binaries, ) +from varats.project.sources import FeatureSource +from varats.project.varats_command import VCommand from varats.project.varats_project import VProject from varats.utils.git_util import ( ShortCommitHash, @@ -56,7 +58,8 @@ class Bzip2(VProject): "countries-land-1m.geo.json", "countries-land-10m.geo.json", "countries-land-100m.geo.json" ] - ) + ), + FeatureSource() ] _AUTOTOOLS_VERSIONS = GoodBadSubgraph([ "8cfd87aed5ba8843af50569fb440489b1ca74259" @@ -80,11 +83,11 @@ class Bzip2(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.MEDIUM): [ - Command( + VCommand( SourceRoot("bzip2") / RSBinary("bzip2"), "--compress", - "--best", - "-vvv", + # "--best", + # "-vvv", "--keep", # bzip2 compresses very fast even on the best setting, so we # need the three input files to get approximately 30 seconds @@ -92,11 +95,13 @@ class Bzip2(VProject): "geo-maps/countries-land-1m.geo.json", "geo-maps/countries-land-10m.geo.json", "geo-maps/countries-land-100m.geo.json", + label="med_geo", creates=[ "geo-maps/countries-land-1m.geo.json.bz2", "geo-maps/countries-land-10m.geo.json.bz2", "geo-maps/countries-land-100m.geo.json.bz2" - ] + ], + requires_all_args={"--compress"} ) ], } From 4fde0f1c5ae32ce86d633874263d726b33e059c4 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 6 Oct 2023 19:57:12 +0200 Subject: [PATCH 140/224] A few tweaks for dune --- .../vara/feature_perf_precision.py | 40 +++++++++++++++++-- varats/varats/projects/cpp_projects/dune.py | 6 ++- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index baefd6258..affef995b 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -57,6 +57,7 @@ def perf_prec_workload_commands( project: VProject, binary: ProjectBinaryWrapper ) -> tp.List[ProjectCommand]: + """Uniformly select the workloads that should be processed.""" return workload_commands(project, binary, [ WorkloadCategory.EXAMPLE ]) + workload_commands(project, binary, [ @@ -64,6 +65,25 @@ def perf_prec_workload_commands( ]) + workload_commands(project, binary, [WorkloadCategory.MEDIUM]) +def select_project_binaries(project: VProject) -> tp.List[ProjectBinaryWrapper]: + """Uniformly select the binaries that should be analyzed.""" + if project.name == "DunePerfRegression": + return [ + binary for binary in project.binaries + if binary.name == "poisson_yasp_q2_3d" + ] + + return [project.binaries[0]] + + +def get_extra_cflags(project: VProject) -> tp.List[str]: + if project.name == "DunePerfRegression": + # Disable phasar for dune as the analysis cannot handle dunes size + return ["-mllvm", "--vara-disable-phasar"] + + return [] + + class AnalysisProjectStepBase(OutputFolderStep): project: VProject @@ -386,6 +406,8 @@ def setup_actions_for_vara_experiment( instr_type, project=project, instruction_threshold=threshold ) + project.cflags += get_extra_cflags(project) + project.ldflags += experiment.get_vara_tracing_ldflags() # Add the required runtime extensions to the project(s). @@ -403,7 +425,8 @@ def setup_actions_for_vara_experiment( experiment.get_handle(), project, experiment.REPORT_SPEC.main_report ) - binary = project.binaries[0] + # TODO: change to multiple binaries + binary = select_project_binaries(project)[0] if binary.type != BinaryType.EXECUTABLE: raise AssertionError("Experiment only works with executables.") @@ -627,6 +650,8 @@ def actions_for_project( """ project.cflags += ["-flto", "-fuse-ld=lld", "-fno-omit-frame-pointer"] + project.cflags += get_extra_cflags(project) + project.ldflags += self.get_vara_tracing_ldflags() # Add the required runtime extensions to the project(s). @@ -644,7 +669,8 @@ def actions_for_project( self.get_handle(), project, self.REPORT_SPEC.main_report ) - binary = project.binaries[0] + # TODO: change to multiple binaries + binary = select_project_binaries(project)[0] if binary.type != BinaryType.EXECUTABLE: raise AssertionError("Experiment only works with executables.") @@ -937,6 +963,8 @@ def setup_actions_for_vara_overhead_experiment( instr_type, project=project, instruction_threshold=threshold ) + project.cflags += get_extra_cflags(project) + project.ldflags += experiment.get_vara_tracing_ldflags() # Add the required runtime extensions to the project(s). @@ -954,7 +982,8 @@ def setup_actions_for_vara_overhead_experiment( experiment.get_handle(), project, experiment.REPORT_SPEC.main_report ) - binary = project.binaries[0] + # TODO: change to multiple binaries + binary = select_project_binaries(project)[0] if binary.type != BinaryType.EXECUTABLE: raise AssertionError("Experiment only works with executables.") @@ -1153,6 +1182,8 @@ def actions_for_project( """ project.cflags += ["-flto", "-fuse-ld=lld", "-fno-omit-frame-pointer"] + project.cflags += get_extra_cflags(project) + project.ldflags += self.get_vara_tracing_ldflags() # Add the required runtime extensions to the project(s). @@ -1170,7 +1201,8 @@ def actions_for_project( self.get_handle(), project, self.REPORT_SPEC.main_report ) - binary = project.binaries[0] + # TODO: change to multiple binaries + binary = select_project_binaries(project)[0] if binary.type != BinaryType.EXECUTABLE: raise AssertionError("Experiment only works with executables.") diff --git a/varats/varats/projects/cpp_projects/dune.py b/varats/varats/projects/cpp_projects/dune.py index aba9ef16d..37c2a2d71 100644 --- a/varats/varats/projects/cpp_projects/dune.py +++ b/varats/varats/projects/cpp_projects/dune.py @@ -283,7 +283,11 @@ def compile(self) -> None: cxx_compiler = bb.compiler.cxx(self) with local.cwd(version_source): - with local.env(CC=c_compiler, CXX=cxx_compiler): + with local.env( + CC=c_compiler, + CXX=cxx_compiler, + CMAKE_FLAGS="-DDUNE_ENABLE_PYTHONBINDINGS=OFF" + ): dunecontrol = cmd['./dune-common/bin/dunecontrol'] bb.watch(dunecontrol From e4171e5124734182892462c2c86030b8db731226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Sat, 7 Oct 2023 20:07:39 +0200 Subject: [PATCH 141/224] Adds token for runtime config params for workload commands. --- .../SynthIPRuntime_0.case_study | 25 ++++++++++++++++ tests/experiment/test_workload_util.py | 30 +++++++++++++++++++ .../varats/experiment/workload_util.py | 17 +++++++++++ .../perf_tests/feature_perf_cs_collection.py | 14 +++++---- 4 files changed, 81 insertions(+), 5 deletions(-) create mode 100644 tests/TEST_INPUTS/paper_configs/test_config_ids/SynthIPRuntime_0.case_study diff --git a/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthIPRuntime_0.case_study b/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthIPRuntime_0.case_study new file mode 100644 index 000000000..723eb0149 --- /dev/null +++ b/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthIPRuntime_0.case_study @@ -0,0 +1,25 @@ +--- +DocType: CaseStudy +Version: 1 +... +--- +project_name: SynthIPRuntime +stages: +- revisions: + - commit_hash: 793035062810ea3a2d9a10f831cd199fbbb82090 + commit_id: 64 + config_ids: + - 0 + - 1 + - 2 + - 3 + - 4 +version: 0 +... +--- +config_type: PlainCommandlineConfiguration +0: '["-d"]' +1: '["-c"]' +2: '["-c", "-1"]' +3: '["-c", "-2"]' +4: '["-c", "-1", "-2"]' diff --git a/tests/experiment/test_workload_util.py b/tests/experiment/test_workload_util.py index e98e56d00..201519fc2 100644 --- a/tests/experiment/test_workload_util.py +++ b/tests/experiment/test_workload_util.py @@ -11,6 +11,7 @@ from varats.projects.c_projects.xz import Xz from varats.projects.perf_tests.feature_perf_cs_collection import ( SynthIPTemplate, + SynthIPRuntime, ) from varats.utils.git_util import ShortCommitHash from varats.utils.settings import vara_cfg @@ -72,6 +73,35 @@ def test_workload_commands_requires(self) -> None: ) self.assertEqual(len(commands), 1) + @run_in_test_environment(UnitTestFixtures.PAPER_CONFIGS) + def test_workload_config_param_token(self) -> None: + vara_cfg()['paper_config']['current_config'] = "test_config_ids" + load_paper_config() + + revision = Revision( + SynthIPRuntime, Variant(SynthIPRuntime.SOURCE[0], "7930350628"), + Variant(SynthIPRuntime.SOURCE[1], "1") + ) + project = SynthIPRuntime(revision=revision) + binary = SynthIPRuntime.binaries_for_revision( + ShortCommitHash("7930350628") + )[0] + + commands = wu.workload_commands( + project, binary, [wu.WorkloadCategory.SMALL] + ) + self.assertEqual(len(commands), 1) + command = commands[0] + command.path.parent.mkdir(parents=True, exist_ok=True) + command.path.touch() # as_plumbum asserts the path exists + plumbum_command = command.command.as_plumbum(project=project) + self.assertEquals( + plumbum_command.args, [ + "-c", "<", "geo-maps/countries-land-1km.geo.json", ">", + "geo-maps/countries-land-1km.geo.json.compressed" + ] + ) + @run_in_test_environment(UnitTestFixtures.PAPER_CONFIGS) def test_workload_commands_requires_patch(self) -> None: vara_cfg()['paper_config']['current_config'] = "test_config_ids" diff --git a/varats-core/varats/experiment/workload_util.py b/varats-core/varats/experiment/workload_util.py index 38b82720f..878ac66d6 100644 --- a/varats-core/varats/experiment/workload_util.py +++ b/varats-core/varats/experiment/workload_util.py @@ -11,6 +11,7 @@ from pathlib import Path from benchbuild.command import ( + ArgsToken, PathToken, ProjectCommand, unwrap, @@ -71,6 +72,22 @@ def specify_binary(binary_name: str) -> PathToken: RSBinary = specify_binary +class ConfigurationParameterRenderer: + + def unrendered(self) -> str: + return f"" + + def rendered(self, project: VProject, **kwargs: tp.Any) -> tp.Tuple[str]: + return tuple(get_extra_config_options(project)) + + +def specify_configuration_parameters() -> ArgsToken: + return ArgsToken.make_token(ConfigurationParameterRenderer()) + + +ConfigParams = specify_configuration_parameters + + def workload_commands( project: VProject, binary: ProjectBinaryWrapper, requested_workload_tags: tp.List[WorkloadCategory] diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 0674170ee..ec04656a5 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -10,7 +10,11 @@ from benchbuild.utils.settings import get_number_of_jobs from plumbum import local -from varats.experiment.workload_util import RSBinary, WorkloadCategory +from varats.experiment.workload_util import ( + RSBinary, + WorkloadCategory, + ConfigParams, +) from varats.paper.paper_config import project_filter_generator from varats.project.project_domain import ProjectDomains from varats.project.project_util import ( @@ -414,6 +418,7 @@ class SynthIPRuntime(VProject): shallow=False, version_filter=project_filter_generator("SynthIPRuntime") ), + FeatureSource(), HTTPMultiple( local="geo-maps", remote={ @@ -422,15 +427,14 @@ class SynthIPRuntime(VProject): "download/v0.6.0" }, files=["countries-land-1km.geo.json", "countries-land-1m.geo.json"] - ), - FeatureSource() + ) ] WORKLOADS = { WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - "-c", + ConfigParams(), "<", "geo-maps/countries-land-1km.geo.json", ">", @@ -443,7 +447,7 @@ class SynthIPRuntime(VProject): WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - "-c", + ConfigParams(), "<", "geo-maps/countries-land-1m.geo.json", ">", From 070db2ef93eddc38e86d6d88b3d4f1cbea86675d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Sat, 7 Oct 2023 20:24:39 +0200 Subject: [PATCH 142/224] Fix typing --- varats-core/varats/experiment/workload_util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/varats-core/varats/experiment/workload_util.py b/varats-core/varats/experiment/workload_util.py index 878ac66d6..a5cae1287 100644 --- a/varats-core/varats/experiment/workload_util.py +++ b/varats-core/varats/experiment/workload_util.py @@ -77,7 +77,8 @@ class ConfigurationParameterRenderer: def unrendered(self) -> str: return f"" - def rendered(self, project: VProject, **kwargs: tp.Any) -> tp.Tuple[str]: + def rendered(self, project: VProject, + **kwargs: tp.Any) -> tp.Tuple[str, ...]: return tuple(get_extra_config_options(project)) From 0dce960979f5ea87d07e1db5779681b6ee9e6dc8 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 7 Oct 2023 21:16:28 +0200 Subject: [PATCH 143/224] Merge --- tests/experiment/test_workload_util.py | 30 +++++++++++++++++++ .../varats/experiment/workload_util.py | 18 +++++++++++ .../perf_tests/feature_perf_cs_collection.py | 14 +++++---- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/tests/experiment/test_workload_util.py b/tests/experiment/test_workload_util.py index e98e56d00..201519fc2 100644 --- a/tests/experiment/test_workload_util.py +++ b/tests/experiment/test_workload_util.py @@ -11,6 +11,7 @@ from varats.projects.c_projects.xz import Xz from varats.projects.perf_tests.feature_perf_cs_collection import ( SynthIPTemplate, + SynthIPRuntime, ) from varats.utils.git_util import ShortCommitHash from varats.utils.settings import vara_cfg @@ -72,6 +73,35 @@ def test_workload_commands_requires(self) -> None: ) self.assertEqual(len(commands), 1) + @run_in_test_environment(UnitTestFixtures.PAPER_CONFIGS) + def test_workload_config_param_token(self) -> None: + vara_cfg()['paper_config']['current_config'] = "test_config_ids" + load_paper_config() + + revision = Revision( + SynthIPRuntime, Variant(SynthIPRuntime.SOURCE[0], "7930350628"), + Variant(SynthIPRuntime.SOURCE[1], "1") + ) + project = SynthIPRuntime(revision=revision) + binary = SynthIPRuntime.binaries_for_revision( + ShortCommitHash("7930350628") + )[0] + + commands = wu.workload_commands( + project, binary, [wu.WorkloadCategory.SMALL] + ) + self.assertEqual(len(commands), 1) + command = commands[0] + command.path.parent.mkdir(parents=True, exist_ok=True) + command.path.touch() # as_plumbum asserts the path exists + plumbum_command = command.command.as_plumbum(project=project) + self.assertEquals( + plumbum_command.args, [ + "-c", "<", "geo-maps/countries-land-1km.geo.json", ">", + "geo-maps/countries-land-1km.geo.json.compressed" + ] + ) + @run_in_test_environment(UnitTestFixtures.PAPER_CONFIGS) def test_workload_commands_requires_patch(self) -> None: vara_cfg()['paper_config']['current_config'] = "test_config_ids" diff --git a/varats-core/varats/experiment/workload_util.py b/varats-core/varats/experiment/workload_util.py index 38b82720f..a5cae1287 100644 --- a/varats-core/varats/experiment/workload_util.py +++ b/varats-core/varats/experiment/workload_util.py @@ -11,6 +11,7 @@ from pathlib import Path from benchbuild.command import ( + ArgsToken, PathToken, ProjectCommand, unwrap, @@ -71,6 +72,23 @@ def specify_binary(binary_name: str) -> PathToken: RSBinary = specify_binary +class ConfigurationParameterRenderer: + + def unrendered(self) -> str: + return f"" + + def rendered(self, project: VProject, + **kwargs: tp.Any) -> tp.Tuple[str, ...]: + return tuple(get_extra_config_options(project)) + + +def specify_configuration_parameters() -> ArgsToken: + return ArgsToken.make_token(ConfigurationParameterRenderer()) + + +ConfigParams = specify_configuration_parameters + + def workload_commands( project: VProject, binary: ProjectBinaryWrapper, requested_workload_tags: tp.List[WorkloadCategory] diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 04f92776e..5b87f439f 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -11,7 +11,11 @@ from plumbum import local from varats.containers.containers import get_base_image, ImageBase -from varats.experiment.workload_util import RSBinary, WorkloadCategory +from varats.experiment.workload_util import ( + RSBinary, + WorkloadCategory, + ConfigParams, +) from varats.paper.paper_config import project_filter_generator from varats.project.project_domain import ProjectDomains from varats.project.project_util import ( @@ -732,6 +736,7 @@ class SynthIPRuntime(VProject): shallow=False, version_filter=project_filter_generator("SynthIPRuntime") ), + FeatureSource(), HTTPMultiple( local="geo-maps", remote={ @@ -740,15 +745,14 @@ class SynthIPRuntime(VProject): "download/v0.6.0" }, files=["countries-land-1km.geo.json", "countries-land-1m.geo.json"] - ), - FeatureSource() + ) ] WORKLOADS = { WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - "-c", + ConfigParams(), "<", "geo-maps/countries-land-1km.geo.json", ">", @@ -761,7 +765,7 @@ class SynthIPRuntime(VProject): WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - "-c", + ConfigParams(), "<", "geo-maps/countries-land-1m.geo.json", ">", From 4b75cd5b970a9eb63a0ab6372a5ea263b53f5a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Sun, 8 Oct 2023 11:28:09 +0200 Subject: [PATCH 144/224] Allow default values for ConfigParams() --- varats-core/varats/experiment/workload_util.py | 12 +++++++++--- .../perf_tests/feature_perf_cs_collection.py | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/varats-core/varats/experiment/workload_util.py b/varats-core/varats/experiment/workload_util.py index a5cae1287..f35d617e7 100644 --- a/varats-core/varats/experiment/workload_util.py +++ b/varats-core/varats/experiment/workload_util.py @@ -20,9 +20,10 @@ Command, ) +from varats.base.configuration import PlainCommandlineConfiguration from varats.experiment.experiment_util import ( get_extra_config_options, - get_config_patches, + get_config, ) from varats.project.project_util import ProjectBinaryWrapper from varats.project.varats_command import VCommand @@ -74,16 +75,21 @@ def specify_binary(binary_name: str) -> PathToken: class ConfigurationParameterRenderer: + def __init__(self, *default_args: str) -> None: + self.__default_args = default_args + def unrendered(self) -> str: return f"" def rendered(self, project: VProject, **kwargs: tp.Any) -> tp.Tuple[str, ...]: + if get_config(project, PlainCommandlineConfiguration) is None: + return self.__default_args return tuple(get_extra_config_options(project)) -def specify_configuration_parameters() -> ArgsToken: - return ArgsToken.make_token(ConfigurationParameterRenderer()) +def specify_configuration_parameters(*default_args: str) -> ArgsToken: + return ArgsToken.make_token(ConfigurationParameterRenderer(*default_args)) ConfigParams = specify_configuration_parameters diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index ec04656a5..b51700cbd 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -434,7 +434,7 @@ class SynthIPRuntime(VProject): WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams(), + ConfigParams("-c"), "<", "geo-maps/countries-land-1km.geo.json", ">", @@ -447,7 +447,7 @@ class SynthIPRuntime(VProject): WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams(), + ConfigParams("-c"), "<", "geo-maps/countries-land-1m.geo.json", ">", From 9399aa277debfd0604f1fd87765391ce26073b05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Sun, 8 Oct 2023 11:28:22 +0200 Subject: [PATCH 145/224] Refactor workload filtering --- .../varats/experiment/workload_util.py | 17 ++---- varats-core/varats/project/varats_command.py | 53 +++++++++++-------- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/varats-core/varats/experiment/workload_util.py b/varats-core/varats/experiment/workload_util.py index f35d617e7..d97d71bfe 100644 --- a/varats-core/varats/experiment/workload_util.py +++ b/varats-core/varats/experiment/workload_util.py @@ -26,7 +26,7 @@ get_config, ) from varats.project.project_util import ProjectBinaryWrapper -from varats.project.varats_command import VCommand +from varats.project.varats_command import VProjectCommand from varats.project.varats_project import VProject from varats.report.report import KeyedReportAggregate, ReportTy from varats.utils.exceptions import auto_unwrap @@ -113,26 +113,17 @@ def workload_commands( if requested_workload_tags: run_only = WorkloadSet(*requested_workload_tags) - project_cmds: tp.List[ProjectCommand] = [ - ProjectCommand(project, workload_cmd) + project_cmds: tp.List[VProjectCommand] = [ + VProjectCommand(project, workload_cmd) for workload_cmd in itertools.chain( * filter_workload_index(run_only, unwrap(project.workloads, project)) ) ] - # Filter commands that have required args and patches set. - extra_options = set(get_extra_config_options(project)) - patches = get_config_patches(project) - - def filter_by_config(prj_cmd: ProjectCommand) -> bool: - if isinstance(prj_cmd.command, VCommand): - return prj_cmd.command.can_be_executed_by(extra_options, patches) - return True - return [ cmd for cmd in project_cmds - if cmd.path.name == binary.name and filter_by_config(cmd) + if cmd.path.name == binary.name and cmd.can_be_executed() ] diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index 314a1ee55..551185fae 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -1,10 +1,10 @@ """Custom version of benchbuild's Command for use with the VaRA-Tool-Suite.""" import typing as tp -from benchbuild.command import Command +from benchbuild.command import Command, ProjectCommand -if tp.TYPE_CHECKING: - import varats.provider.patch.patch_provider as patch_provider +from varats.experiment.experiment_util import get_config_patches +from varats.project.varats_project import VProject class VCommand(Command): # type: ignore [misc] @@ -56,37 +56,44 @@ def requires_any_patch(self) -> tp.Set[str]: def requires_all_patch(self) -> tp.Set[str]: return self._requires_all_patch - def can_be_executed_by( - self, extra_args: tp.Set[str], - applied_patches: 'patch_provider.PatchSet' - ) -> bool: - """ - Checks whether this command can be executed with the give configuration. - Args: - extra_args: additional command line arguments that will be passed to - the command - applied_patches: patches that were applied to create the executable +class VProjectCommand(ProjectCommand): # type: ignore + + def __init__(self, project: VProject, command: Command): + super().__init__(project, command) + self.v_command = command if isinstance(command, VCommand) else None + self.v_project = project + + def can_be_executed(self) -> bool: + """ + Checks whether this command can be executed with the given + configuration. Returns: whether this command can be executed """ - all_args = set(self._args).union(extra_args) + # non-VCommands do not support filtering by configuration, so we default + # to using them as-is + if self.v_command is None: + return True + + all_args = self.v_command.as_plumbum(project=self.project).args all_patch_tags: tp.Set[str] = set() - for patch in applied_patches: + + for patch in get_config_patches(self.v_project): if patch.feature_tags: all_patch_tags.update(patch.feature_tags) return bool(( - not self.requires_any_args or - all_args.intersection(self.requires_any_args) + not self.v_command.requires_any_args or + all_args.intersection(self.v_command.requires_any_args) ) and ( - not self.requires_all_args or - self.requires_all_args.issubset(all_args) + not self.v_command.requires_all_args or + self.v_command.requires_all_args.issubset(all_args) ) and ( - not self.requires_any_patch or - all_patch_tags.intersection(self.requires_any_patch) + not self.v_command.requires_any_patch or + all_patch_tags.intersection(self.v_command.requires_any_patch) ) and ( - not self.requires_all_patch or - self.requires_all_patch.issubset(all_patch_tags) + not self.v_command.requires_all_patch or + self.v_command.requires_all_patch.issubset(all_patch_tags) )) From bcd97e6b0c38ce2d6d9f9cc0bce3c0648a7895ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Sun, 8 Oct 2023 11:49:28 +0200 Subject: [PATCH 146/224] Move get_config* functions to utils.config --- tests/utils/test_experiment_util.py | 13 ++- .../varats/experiment/experiment_util.py | 97 +---------------- .../varats/experiment/workload_util.py | 5 +- varats-core/varats/project/varats_command.py | 10 +- varats-core/varats/utils/config.py | 101 +++++++++++++++++- .../varats/experiments/base/just_compile.py | 2 +- .../experiments/vara/feature_experiment.py | 3 +- 7 files changed, 118 insertions(+), 113 deletions(-) diff --git a/tests/utils/test_experiment_util.py b/tests/utils/test_experiment_util.py index a6bc93bd1..b82ea7e03 100644 --- a/tests/utils/test_experiment_util.py +++ b/tests/utils/test_experiment_util.py @@ -29,6 +29,11 @@ ) from varats.report.gnu_time_report import TimeReport from varats.report.report import FileStatusExtension, ReportSpecification +from varats.utils.config import ( + get_current_config_id, + get_extra_config_options, + get_config_patches, +) from varats.utils.git_util import ShortCommitHash from varats.utils.settings import vara_cfg, bb_cfg @@ -402,7 +407,7 @@ class TestConfigID(unittest.TestCase): def test_get_current_config_id_no_config(self) -> None: revision = Revision(Xz, Variant(Xz.SOURCE[0], "c5c7ceb08a")) project = Xz(revision=revision) - self.assertEqual(EU.get_current_config_id(project), None) + self.assertEqual(get_current_config_id(project), None) def test_get_current_config_id(self) -> None: revision = Revision( @@ -410,7 +415,7 @@ def test_get_current_config_id(self) -> None: Variant(Xz.SOURCE[1], "42") ) project = Xz(revision=revision) - self.assertEqual(EU.get_current_config_id(project), 42) + self.assertEqual(get_current_config_id(project), 42) @run_in_test_environment(UnitTestFixtures.PAPER_CONFIGS) def test_get_extra_config_options(self) -> None: @@ -421,7 +426,7 @@ def test_get_extra_config_options(self) -> None: Xz, Variant(Xz.SOURCE[0], "c5c7ceb08a"), Variant(Xz.SOURCE[1], "1") ) project = Xz(revision=revision) - self.assertEqual(EU.get_extra_config_options(project), ["--foo"]) + self.assertEqual(get_extra_config_options(project), ["--foo"]) @run_in_test_environment(UnitTestFixtures.PAPER_CONFIGS) def test_get_config_patches(self) -> None: @@ -433,7 +438,7 @@ def test_get_config_patches(self) -> None: Variant(SynthIPTemplate.SOURCE[1], "4") ) project = SynthIPTemplate(revision=revision) - patches = EU.get_config_patches(project) + patches = get_config_patches(project) self.assertEqual(len(patches), 1) self.assertEqual( list(patches)[0].feature_tags, diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index bad60ba6f..78e2aee8c 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -22,17 +22,9 @@ from plumbum.commands.base import BoundCommand import varats.revision.revisions as revs -from varats.base.configuration import ( - PlainCommandlineConfiguration, - PatchConfiguration, - Configuration, -) from varats.experiment.steps.patch import ApplyPatch -from varats.paper.paper_config import get_paper_config from varats.project.project_util import ProjectBinaryWrapper -from varats.project.sources import FeatureSource from varats.project.varats_project import VProject -from varats.provider.patch.patch_provider import PatchSet, PatchProvider from varats.report.report import ( BaseReport, FileStatusExtension, @@ -40,7 +32,7 @@ ReportSpecification, ReportFilename, ) -from varats.utils.config import load_configuration_map_for_case_study +from varats.utils.config import get_config_patches from varats.utils.git_util import ShortCommitHash from varats.utils.settings import vara_cfg, bb_cfg @@ -681,93 +673,6 @@ def create_new_failed_result_filepath( ) -def get_current_config_id(project: VProject) -> tp.Optional[int]: - """ - Get, if available, the current config id of project. Should the project be - not configuration specific ``None`` is returned. - - Args: - project: to extract the config id from - - Returns: - config_id if available for the given project - """ - if project.active_revision.has_variant(FeatureSource.LOCAL_KEY): - return int( - project.active_revision.variant_by_name(FeatureSource.LOCAL_KEY - ).version - ) - - return None - - -def get_config( - project: VProject, config_type: tp.Type[Configuration] -) -> tp.Optional[Configuration]: - config_id = get_current_config_id(project) - if config_id is None: - return None - - paper_config = get_paper_config() - case_studies = paper_config.get_case_studies(cs_name=project.name) - - if len(case_studies) > 1: - raise AssertionError( - "Cannot handle multiple case studies of the same project." - ) - - case_study = case_studies[0] - - config_map = load_configuration_map_for_case_study( - paper_config, case_study, config_type - ) - - config = config_map.get_configuration(config_id) - - return config - - -def get_extra_config_options(project: VProject) -> tp.List[str]: - """ - Get extra program options that were specified in the particular - configuration of \a Project. - - Args: - project: to get the extra options for - - Returns: - list of command line options as string - """ - config = get_config(project, PlainCommandlineConfiguration) - if not config: - return [] - return list(map(lambda option: option.value, config.options())) - - -def get_config_patches(project: VProject) -> PatchSet: - """ - Get required patches for the particular configuration of \a Project. - - Args: - project: to get the patches for - - Returns: - list of patches - """ - config = get_config(project, PatchConfiguration) - if not config: - return PatchSet(set()) - - patch_provider = PatchProvider.create_provider_for_project(project) - revision = ShortCommitHash(project.revision.primary.version) - feature_tags = {opt.value for opt in config.options()} - patches = patch_provider.get_patches_for_revision(revision).all_of_features( - feature_tags - ) - - return patches - - def get_config_patch_steps(project: VProject) -> tp.MutableSequence[Step]: """ Get a list of actions that apply all configuration patches to the project. diff --git a/varats-core/varats/experiment/workload_util.py b/varats-core/varats/experiment/workload_util.py index d97d71bfe..3d4286414 100644 --- a/varats-core/varats/experiment/workload_util.py +++ b/varats-core/varats/experiment/workload_util.py @@ -21,14 +21,11 @@ ) from varats.base.configuration import PlainCommandlineConfiguration -from varats.experiment.experiment_util import ( - get_extra_config_options, - get_config, -) from varats.project.project_util import ProjectBinaryWrapper from varats.project.varats_command import VProjectCommand from varats.project.varats_project import VProject from varats.report.report import KeyedReportAggregate, ReportTy +from varats.utils.config import get_config, get_extra_config_options from varats.utils.exceptions import auto_unwrap diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index 551185fae..4861763f1 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -3,8 +3,10 @@ from benchbuild.command import Command, ProjectCommand -from varats.experiment.experiment_util import get_config_patches -from varats.project.varats_project import VProject +from varats.utils.config import get_config_patches + +if tp.TYPE_CHECKING: + from varats.project.varats_project import VProject class VCommand(Command): # type: ignore [misc] @@ -59,7 +61,7 @@ def requires_all_patch(self) -> tp.Set[str]: class VProjectCommand(ProjectCommand): # type: ignore - def __init__(self, project: VProject, command: Command): + def __init__(self, project: 'VProject', command: Command): super().__init__(project, command) self.v_command = command if isinstance(command, VCommand) else None self.v_project = project @@ -77,7 +79,7 @@ def can_be_executed(self) -> bool: if self.v_command is None: return True - all_args = self.v_command.as_plumbum(project=self.project).args + all_args = set(self.v_command.rendered_args()) all_patch_tags: tp.Set[str] = set() for patch in get_config_patches(self.v_project): diff --git a/varats-core/varats/utils/config.py b/varats-core/varats/utils/config.py index 9cdb950c9..c76e6c35c 100644 --- a/varats-core/varats/utils/config.py +++ b/varats-core/varats/utils/config.py @@ -2,13 +2,23 @@ import typing as tp from pathlib import Path -from varats.base.configuration import Configuration +from varats.base.configuration import ( + Configuration, + PlainCommandlineConfiguration, + PatchConfiguration, +) from varats.mapping.configuration_map import ConfigurationMap from varats.paper.case_study import ( CaseStudy, load_configuration_map_from_case_study_file, ) -from varats.paper.paper_config import PaperConfig +from varats.paper.paper_config import PaperConfig, get_paper_config +from varats.project.sources import FeatureSource +from varats.provider.patch.patch_provider import PatchSet, PatchProvider +from varats.utils.git_util import ShortCommitHash + +if tp.TYPE_CHECKING: + from varats.project.varats_project import VProject def load_configuration_map_for_case_study( @@ -34,3 +44,90 @@ def load_configuration_map_for_case_study( f"{case_study.project_name}_{case_study.version}.case_study" ), concrete_config_type ) + + +def get_current_config_id(project: 'VProject') -> tp.Optional[int]: + """ + Get, if available, the current config id of project. Should the project be + not configuration specific ``None`` is returned. + + Args: + project: to extract the config id from + + Returns: + config_id if available for the given project + """ + if project.active_revision.has_variant(FeatureSource.LOCAL_KEY): + return int( + project.active_revision.variant_by_name(FeatureSource.LOCAL_KEY + ).version + ) + + return None + + +def get_config( + project: 'VProject', config_type: tp.Type[Configuration] +) -> tp.Optional[Configuration]: + config_id = get_current_config_id(project) + if config_id is None: + return None + + paper_config = get_paper_config() + case_studies = paper_config.get_case_studies(cs_name=project.name) + + if len(case_studies) > 1: + raise AssertionError( + "Cannot handle multiple case studies of the same project." + ) + + case_study = case_studies[0] + + config_map = load_configuration_map_for_case_study( + paper_config, case_study, config_type + ) + + config = config_map.get_configuration(config_id) + + return config + + +def get_extra_config_options(project: 'VProject') -> tp.List[str]: + """ + Get extra program options that were specified in the particular + configuration of \a Project. + + Args: + project: to get the extra options for + + Returns: + list of command line options as string + """ + config = get_config(project, PlainCommandlineConfiguration) + if not config: + return [] + return list(map(lambda option: option.value, config.options())) + + +def get_config_patches(project: 'VProject') -> PatchSet: + """ + Get required patches for the particular configuration of \a Project. + + Args: + project: to get the patches for + + Returns: + list of patches + """ + config = get_config(project, PatchConfiguration) + if not config: + return PatchSet(set()) + + patch_provider = PatchProvider.create_provider_for_project(project) + revision = ShortCommitHash(project.revision.primary.version) + feature_tags = {opt.value for opt in config.options()} + patches = patch_provider.get_patches_for_revision(revision).all_of_features( + feature_tags + ) + + return patches diff --git a/varats/varats/experiments/base/just_compile.py b/varats/varats/experiments/base/just_compile.py index 0531bd878..cb91e936a 100644 --- a/varats/varats/experiments/base/just_compile.py +++ b/varats/varats/experiments/base/just_compile.py @@ -15,11 +15,11 @@ get_default_compile_error_wrapped, create_default_analysis_failure_handler, create_new_success_result_filepath, - get_current_config_id, ) from varats.experiment.wllvm import RunWLLVM from varats.project.varats_project import VProject from varats.report.report import ReportSpecification +from varats.utils.config import get_current_config_id # Please take care when changing this file, see docs experiments/just_compile diff --git a/varats/varats/experiments/vara/feature_experiment.py b/varats/varats/experiments/vara/feature_experiment.py index 52d2ffa16..fd4ce236e 100644 --- a/varats/varats/experiments/vara/feature_experiment.py +++ b/varats/varats/experiments/vara/feature_experiment.py @@ -24,9 +24,7 @@ VersionExperiment, ZippedReportFolder, create_new_success_result_filepath, - get_current_config_id, get_default_compile_error_wrapped, - get_extra_config_options, WithUnlimitedStackSize, ) from varats.experiment.trace_util import merge_trace @@ -39,6 +37,7 @@ FeatureModelProvider, ) from varats.report.report import ReportSpecification +from varats.utils.config import get_current_config_id, get_extra_config_options class FeatureInstrType(Enum): From c51899bf15898c9ea3bd23c624b3027d697e77eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Sun, 8 Oct 2023 12:32:28 +0200 Subject: [PATCH 147/224] Fix tests --- .../test_config_ids/SynthIPTemplate_0.case_study | 2 +- tests/experiment/test_workload_util.py | 13 ++++++------- varats-core/varats/project/varats_command.py | 2 +- .../perf_tests/feature_perf_cs_collection.py | 8 ++------ 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthIPTemplate_0.case_study b/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthIPTemplate_0.case_study index 28324d99a..f37a50f77 100644 --- a/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthIPTemplate_0.case_study +++ b/tests/TEST_INPUTS/paper_configs/test_config_ids/SynthIPTemplate_0.case_study @@ -19,7 +19,7 @@ version: 0 --- config_type: PatchConfiguration 0: '["Decompress"]' -1: '["Compress"]' +1: '["Compress", "no_fastmode", "no_smallmode"]' 2: '["Compress", "fastmode", "no_smallmode"]' 3: '["Compress", "no_fastmode", "smallmode"]' 4: '["Compress", "fastmode", "smallmode"]' diff --git a/tests/experiment/test_workload_util.py b/tests/experiment/test_workload_util.py index 201519fc2..7b41acc41 100644 --- a/tests/experiment/test_workload_util.py +++ b/tests/experiment/test_workload_util.py @@ -92,14 +92,13 @@ def test_workload_config_param_token(self) -> None: ) self.assertEqual(len(commands), 1) command = commands[0] - command.path.parent.mkdir(parents=True, exist_ok=True) - command.path.touch() # as_plumbum asserts the path exists - plumbum_command = command.command.as_plumbum(project=project) + args = command.command.rendered_args(project=project) self.assertEquals( - plumbum_command.args, [ + args, + tuple([ "-c", "<", "geo-maps/countries-land-1km.geo.json", ">", "geo-maps/countries-land-1km.geo.json.compressed" - ] + ]) ) @run_in_test_environment(UnitTestFixtures.PAPER_CONFIGS) @@ -116,7 +115,7 @@ def test_workload_commands_requires_patch(self) -> None: ShortCommitHash("7930350628") )[0] workloads = wu.workload_commands(project, binary, []) - self.assertEqual(len(workloads), 2) + self.assertEqual(2, len(workloads)) @run_in_test_environment(UnitTestFixtures.PAPER_CONFIGS) def test_workload_commands_requires_patch2(self) -> None: @@ -131,7 +130,7 @@ def test_workload_commands_requires_patch2(self) -> None: binary = SynthIPTemplate \ .binaries_for_revision(ShortCommitHash("7930350628"))[0] workloads = wu.workload_commands(project, binary, []) - self.assertEqual(len(workloads), 0) + self.assertEqual(0, len(workloads)) class TestWorkloadFilenames(unittest.TestCase): diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index 4861763f1..b00c1bcac 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -79,7 +79,7 @@ def can_be_executed(self) -> bool: if self.v_command is None: return True - all_args = set(self.v_command.rendered_args()) + all_args = set(self.v_command.rendered_args(project=self.v_project)) all_patch_tags: tp.Set[str] = set() for patch in get_config_patches(self.v_project): diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index b51700cbd..dac128d31 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -508,7 +508,6 @@ class SynthIPTemplate(VProject): WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPTemplate") / RSBinary("Template"), - "-c", "<", "geo-maps/countries-land-1km.geo.json", ">", @@ -521,7 +520,6 @@ class SynthIPTemplate(VProject): WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPTemplate") / RSBinary("Template"), - "-c", "<", "geo-maps/countries-land-1m.geo.json", ">", @@ -582,7 +580,6 @@ class SynthIPTemplate2(VProject): WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPTemplate2") / RSBinary("Template2"), - "-c", "<", "geo-maps/countries-land-1km.geo.json", ">", @@ -595,7 +592,6 @@ class SynthIPTemplate2(VProject): WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPTemplate2") / RSBinary("Template2"), - "-c", "<", "geo-maps/countries-land-1m.geo.json", ">", @@ -656,7 +652,7 @@ class SynthIPCombined(VProject): WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPCombined") / RSBinary("Combined"), - "-c", + ConfigParams("-c"), "<", "geo-maps/countries-land-1km.geo.json", ">", @@ -669,7 +665,7 @@ class SynthIPCombined(VProject): WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPCombined") / RSBinary("Combined"), - "-c", + ConfigParams("-c"), "<", "geo-maps/countries-land-1m.geo.json", ">", From 8d3bba4978025a7381c9763ecc64398ccd6ef4d9 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 8 Oct 2023 21:42:46 +0200 Subject: [PATCH 148/224] Updates config handling to new VCommand API --- .../vara/feature_perf_precision.py | 58 +++---------------- .../perf_tests/feature_perf_cs_collection.py | 27 ++++++--- 2 files changed, 25 insertions(+), 60 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index affef995b..37ecf3290 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -24,9 +24,7 @@ WithUnlimitedStackSize, ZippedReportFolder, create_new_success_result_filepath, - get_current_config_id, get_default_compile_error_wrapped, - get_extra_config_options, ZippedExperimentSteps, OutputFolderStep, get_config_patch_steps, @@ -47,6 +45,7 @@ from varats.report.report import ReportSpecification from varats.report.tef_report import TEFReportAggregate from varats.tools.research_tools.vara import VaRA +from varats.utils.config import get_current_config_id from varats.utils.git_util import ShortCommitHash REPS = 3 @@ -180,14 +179,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: f"Running example {prj_command.command.label}" ) - extra_options = get_extra_config_options( - self.project - ) with cleanup(prj_command): - pb_cmd( - *extra_options, - retcode=self._binary.valid_exit_codes - ) + pb_cmd(retcode=self._binary.valid_exit_codes) return StepResult.OK @@ -252,10 +245,6 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: adapted_binary_location, override=True ) - extra_options = get_extra_config_options( - self.project - ) - bpf_runner = bpf_runner = self.attach_usdt_raw_tracing( local_tracefile_path, adapted_binary_location, @@ -264,7 +253,6 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: with cleanup(prj_command): pb_cmd( - *extra_options, retcode=self._binary.valid_exit_codes ) @@ -350,10 +338,6 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: f"Running example {prj_command.command.label}" ) - extra_options = get_extra_config_options( - self.project - ) - bpf_runner = bpf_runner = self.attach_usdt_bcc( local_tracefile_path, self.project.source_of_primary / @@ -361,10 +345,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) with cleanup(prj_command): - pb_cmd( - *extra_options, - retcode=self._binary.valid_exit_codes - ) + pb_cmd(retcode=self._binary.valid_exit_codes) # wait for bpf script to exit if bpf_runner: @@ -621,12 +602,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: timed_pb_cmd = time["-v", "-o", time_report_file, pb_cmd] - extra_options = get_extra_config_options(self.project) with cleanup(prj_command): - timed_pb_cmd( - *extra_options, - retcode=self.__binary.valid_exit_codes - ) + timed_pb_cmd(retcode=self.__binary.valid_exit_codes) return StepResult.OK @@ -780,14 +757,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: timed_pb_cmd = time["-v", "-o", time_report_file, "--", pb_cmd] - extra_options = get_extra_config_options(self.project) with cleanup(prj_command): - # print("timed_pb_cmd=", str(timed_pb_cmd[*extra_options])) - - timed_pb_cmd( - *extra_options, - retcode=self._binary.valid_exit_codes - ) + timed_pb_cmd(retcode=self._binary.valid_exit_codes) return StepResult.OK @@ -856,10 +827,6 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: timed_pb_cmd = time["-v", "-o", time_report_file, "--", pb_cmd] - extra_options = get_extra_config_options( - self.project - ) - bpf_runner = RunBPFTracedWorkloads.attach_usdt_raw_tracing( fake_tracefile_path, adapted_binary_location, Path(non_nfs_tmp_dir) @@ -867,7 +834,6 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: with cleanup(prj_command): timed_pb_cmd( - *extra_options, retcode=self._binary.valid_exit_codes ) @@ -931,18 +897,13 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: timed_pb_cmd = time["-v", "-o", time_report_file, "--", pb_cmd] - extra_options = get_extra_config_options(self.project) - bpf_runner = RunBCCTracedWorkloads.attach_usdt_bcc( fake_tracefile_path, self.project.source_of_primary / self._binary.path ) with cleanup(prj_command): - timed_pb_cmd( - *extra_options, - retcode=self._binary.valid_exit_codes - ) + timed_pb_cmd(retcode=self._binary.valid_exit_codes) # wait for bpf script to exit if bpf_runner: @@ -1152,13 +1113,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: timed_pb_cmd = time["-v", "-o", time_report_file, "--", pb_cmd] - extra_options = get_extra_config_options(self.project) with cleanup(prj_command): - # print("timed_pb_cmd=", str(timed_pb_cmd[*extra_options])) - timed_pb_cmd( - *extra_options, - retcode=self.__binary.valid_exit_codes - ) + timed_pb_cmd(retcode=self.__binary.valid_exit_codes) return StepResult.OK diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 33e062551..2b2cdcd31 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -190,8 +190,9 @@ class SynthSAFieldSensitivity(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot("SynthSAFieldSensitivity") / RSBinary("FieldSense"), + ConfigParams(), label="FieldSense-no-input" ) ] @@ -250,8 +251,9 @@ class SynthSAFlowSensitivity(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot("SynthSAFlowSensitivity") / RSBinary("FlowSense"), + ConfigParams(), label="FlowSense-no-input" ) ] @@ -312,9 +314,10 @@ class SynthSAContextSensitivity(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot("SynthSAContextSensitivity") / RSBinary("ContextSense"), + ConfigParams(), label="ContextSense-no-input" ) ] @@ -373,8 +376,9 @@ class SynthSAWholeProgram(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot("SynthSAWholeProgram") / RSBinary("WholeProgram"), + ConfigParams(), label="WholeProgram-no-input" ) ] @@ -434,9 +438,10 @@ class SynthDADynamicDispatch(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot("SynthDADynamicDispatch") / RSBinary("DynamicDispatch"), + ConfigParams(), label="DynamicDispatch-no-input" ) ] @@ -495,8 +500,9 @@ class SynthDARecursion(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot("SynthDARecursion") / RSBinary("Recursion"), + ConfigParams(), label="Recursion-no-input" ) ] @@ -555,8 +561,9 @@ class SynthOVInsideLoop(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot("SynthOVInsideLoop") / RSBinary("InsideLoop"), + ConfigParams(), label="InsideLoop-no-input" ) ] @@ -616,9 +623,10 @@ class SynthFeatureInteraction(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot("SynthFeatureInteraction") / RSBinary("FeatureInteraction"), + ConfigParams(), label="FeatureInteraction-no-input" ) ] @@ -680,9 +688,10 @@ class SynthFeatureHigherOrderInteraction(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot("SynthFeatureHigherOrderInteraction") / RSBinary("HigherOrderInteraction"), + ConfigParams(), label="HigherOrderInteraction-no-input" ) ] From 039144f00aa8a193ae84aaa8f206372fe08d21cb Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 8 Oct 2023 21:51:43 +0200 Subject: [PATCH 149/224] Work on bzip2 --- varats/varats/projects/c_projects/bzip2.py | 53 +++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/varats/varats/projects/c_projects/bzip2.py b/varats/varats/projects/c_projects/bzip2.py index d89cbd9fe..266a12e36 100644 --- a/varats/varats/projects/c_projects/bzip2.py +++ b/varats/varats/projects/c_projects/bzip2.py @@ -59,6 +59,18 @@ class Bzip2(VProject): "countries-land-100m.geo.json" ] ), + HTTPMultiple( + local="geo-maps-compr", + remote={ + "1.0": + "https://github.com/se-sic/compression-data/raw/master/bzip2/geo-maps/" + }, + files=[ + "countries-land-100m.geo.json.bz2", + "countries-land-10m.geo.json.bz2", + "countries-land-1m.geo.json.bz2" + ] + ), FeatureSource() ] _AUTOTOOLS_VERSIONS = GoodBadSubgraph([ @@ -85,7 +97,7 @@ class Bzip2(VProject): WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("bzip2") / RSBinary("bzip2"), - "--compress", + #"--compress", # "--best", # "-vvv", "--keep", @@ -102,6 +114,26 @@ class Bzip2(VProject): "geo-maps/countries-land-100m.geo.json.bz2" ], requires_all_args={"--compress"} + ), + VCommand( + SourceRoot("bzip2") / RSBinary("bzip2"), + #"--decompress", + # "--best", + # "-vvv", + "--keep", + # bzip2 compresses very fast even on the best setting, so we + # need the three input files to get approximately 30 seconds + # total execution time + "geo-maps-compr/countries-land-1m.geo.json.bz2", + "geo-maps-compr/countries-land-10m.geo.json.bz2", + "geo-maps-compr/countries-land-100m.geo.json.bz2", + label="med_geo", + creates=[ + "geo-maps-compr/countries-land-1m.geo.json", + "geo-maps-compr/countries-land-10m.geo.json", + "geo-maps-compr/countries-land-100m.geo.json" + ], + requires_all_args={"--decompress"} ) ], } @@ -166,3 +198,22 @@ def compile(self) -> None: ) with local.cwd(bzip2_source): verify_binaries(self) + + def recompile(self) -> None: + """Recompile the project.""" + bzip2_source = Path(self.source_of_primary) + bzip2_version = ShortCommitHash(self.version_of_primary) + + if bzip2_version in typed_revision_range( + Bzip2._MAKE_VERSIONS, bzip2_source, ShortCommitHash + ) or bzip2_version in typed_revision_range( + Bzip2._AUTOTOOLS_VERSIONS, bzip2_source, ShortCommitHash + ): + with local.cwd(bzip2_source / "build"): + bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) + else: + with local.cwd(bzip2_source / "build"): + bb.watch(cmake)( + "--build", ".", "--config", "Release", "-j", + get_number_of_jobs(bb_cfg()) + ) From 7295486ce2325cf4569df635db83b61c0a3b013b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Mon, 9 Oct 2023 10:38:44 +0200 Subject: [PATCH 150/224] Adds stdin/stdout redirection to VCommand. --- varats-core/varats/project/varats_command.py | 19 ++- .../perf_tests/feature_perf_cs_collection.py | 161 ++++++++++++------ 2 files changed, 124 insertions(+), 56 deletions(-) diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index b00c1bcac..7d8ad45c6 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -1,11 +1,13 @@ """Custom version of benchbuild's Command for use with the VaRA-Tool-Suite.""" import typing as tp -from benchbuild.command import Command, ProjectCommand +from benchbuild.command import Command, ProjectCommand, PathToken from varats.utils.config import get_config_patches if tp.TYPE_CHECKING: + from plumbum.commands.base import BoundEnvCommand + from varats.project.varats_project import VProject @@ -33,6 +35,8 @@ def __init__( requires_all_args: tp.Optional[tp.Set[str]] = None, requires_any_patch: tp.Optional[tp.Set[str]] = None, requires_all_patch: tp.Optional[tp.Set[str]] = None, + redirect_stdin: tp.Optional[PathToken] = None, + redirect_stdout: tp.Optional[PathToken] = None, **kwargs: tp.Union[str, tp.List[str]], ) -> None: @@ -41,6 +45,8 @@ def __init__( self._requires_all_args = requires_all_args or set() self._requires_any_patch = requires_any_patch or set() self._requires_all_patch = requires_all_patch or set() + self._redirect_stdin = redirect_stdin + self._redirect_stdout = redirect_stdout @property def requires_any_args(self) -> tp.Set[str]: @@ -58,6 +64,17 @@ def requires_any_patch(self) -> tp.Set[str]: def requires_all_patch(self) -> tp.Set[str]: return self._requires_all_patch + def as_plumbum(self, **kwargs: tp.Any) -> 'BoundEnvCommand': + cmd = super().as_plumbum(**kwargs) + + if self._redirect_stdin: + cmd = cmd < self._redirect_stdin.render(**kwargs) + + if self._redirect_stdout: + cmd = cmd > self._redirect_stdout.render(**kwargs) + + return cmd + class VProjectCommand(ProjectCommand): # type: ignore diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index dac128d31..032ade734 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -435,26 +435,32 @@ class SynthIPRuntime(VProject): VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - "<", - "geo-maps/countries-land-1km.geo.json", - ">", - "geo-maps/countries-land-1km.geo.json.compressed", label="countries-land-1km", - creates=["geo-maps/countries-land-1km.geo.json.compressed"], - requires_all_args={"-c"} + creates=[ + SourceRoot("geo-maps") / + "countries-land-1km.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-1km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-1km.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - "<", - "geo-maps/countries-land-1m.geo.json", - ">", - "geo-maps/countries-land-1m.geo.json.compressed", - label="countries-land-1m", - creates=["geo-maps/countries-land-1m.geo.json.compressed"], - requires_all_args={"-c"} + label="countries-land-1km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-1m.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-1m.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-1m.geo.json.compressed" ) ], } @@ -501,32 +507,47 @@ class SynthIPTemplate(VProject): shallow=False, version_filter=project_filter_generator("SynthIPTemplate") ), - FeatureSource() + FeatureSource(), + HTTPMultiple( + local="geo-maps", + remote={ + "1.0": + "https://github.com/simonepri/geo-maps/releases/" + "download/v0.6.0" + }, + files=["countries-land-1km.geo.json", "countries-land-1m.geo.json"] + ) ] WORKLOADS = { WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPTemplate") / RSBinary("Template"), - "<", - "geo-maps/countries-land-1km.geo.json", - ">", - "geo-maps/countries-land-1km.geo.json.compressed", label="countries-land-1km", - creates=["geo-maps/countries-land-1km.geo.json.compressed"], - requires_all_patch={"Compress"} + creates=[ + SourceRoot("geo-maps") / + "countries-land-1km.geo.json.compressed" + ], + requires_all_patch={"Compress"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-1km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-1km.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPTemplate") / RSBinary("Template"), - "<", - "geo-maps/countries-land-1m.geo.json", - ">", - "geo-maps/countries-land-1m.geo.json.compressed", - label="countries-land-1m", - creates=["geo-maps/countries-land-1m.geo.json.compressed"], - requires_all_patch={"Compress"} + label="countries-land-1km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-1m.geo.json.compressed" + ], + requires_all_patch={"Compress"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-1m.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-1m.geo.json.compressed" ) ], } @@ -573,32 +594,47 @@ class SynthIPTemplate2(VProject): shallow=False, version_filter=project_filter_generator("SynthIPTemplate2") ), - FeatureSource() + FeatureSource(), + HTTPMultiple( + local="geo-maps", + remote={ + "1.0": + "https://github.com/simonepri/geo-maps/releases/" + "download/v0.6.0" + }, + files=["countries-land-1km.geo.json", "countries-land-1m.geo.json"] + ) ] WORKLOADS = { WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPTemplate2") / RSBinary("Template2"), - "<", - "geo-maps/countries-land-1km.geo.json", - ">", - "geo-maps/countries-land-1km.geo.json.compressed", label="countries-land-1km", - creates=["geo-maps/countries-land-1km.geo.json.compressed"], - requires_all_patch={"Compress"} + creates=[ + SourceRoot("geo-maps") / + "countries-land-1km.geo.json.compressed" + ], + requires_all_patch={"Compress"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-1km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-1km.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPTemplate2") / RSBinary("Template2"), - "<", - "geo-maps/countries-land-1m.geo.json", - ">", - "geo-maps/countries-land-1m.geo.json.compressed", - label="countries-land-1m", - creates=["geo-maps/countries-land-1m.geo.json.compressed"], - requires_all_patch={"Compress"} + label="countries-land-1km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-1m.geo.json.compressed" + ], + requires_all_patch={"Compress"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-1m.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-1m.geo.json.compressed" ) ], } @@ -645,7 +681,16 @@ class SynthIPCombined(VProject): shallow=False, version_filter=project_filter_generator("SynthIPCombined") ), - FeatureSource() + FeatureSource(), + HTTPMultiple( + local="geo-maps", + remote={ + "1.0": + "https://github.com/simonepri/geo-maps/releases/" + "download/v0.6.0" + }, + files=["countries-land-1km.geo.json", "countries-land-1m.geo.json"] + ) ] WORKLOADS = { @@ -653,26 +698,32 @@ class SynthIPCombined(VProject): VCommand( SourceRoot("SynthIPCombined") / RSBinary("Combined"), ConfigParams("-c"), - "<", - "geo-maps/countries-land-1km.geo.json", - ">", - "geo-maps/countries-land-1km.geo.json.compressed", label="countries-land-1km", - creates=["geo-maps/countries-land-1km.geo.json.compressed"], - requires_all_args={"-c"} + creates=[ + SourceRoot("geo-maps") / + "countries-land-1km.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-1km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-1km.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPCombined") / RSBinary("Combined"), ConfigParams("-c"), - "<", - "geo-maps/countries-land-1m.geo.json", - ">", - "geo-maps/countries-land-1m.geo.json.compressed", - label="countries-land-1m", - creates=["geo-maps/countries-land-1m.geo.json.compressed"], - requires_all_args={"-c"} + label="countries-land-1km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-1m.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-1m.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-1m.geo.json.compressed" ) ], } From 7505811a99965a2206de3cafab33d3acd2c47dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Mon, 9 Oct 2023 10:53:50 +0200 Subject: [PATCH 151/224] Fix test --- tests/experiment/test_workload_util.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/experiment/test_workload_util.py b/tests/experiment/test_workload_util.py index 7b41acc41..1486c9aa5 100644 --- a/tests/experiment/test_workload_util.py +++ b/tests/experiment/test_workload_util.py @@ -93,13 +93,7 @@ def test_workload_config_param_token(self) -> None: self.assertEqual(len(commands), 1) command = commands[0] args = command.command.rendered_args(project=project) - self.assertEquals( - args, - tuple([ - "-c", "<", "geo-maps/countries-land-1km.geo.json", ">", - "geo-maps/countries-land-1km.geo.json.compressed" - ]) - ) + self.assertEquals(args, tuple(["-c"])) @run_in_test_environment(UnitTestFixtures.PAPER_CONFIGS) def test_workload_commands_requires_patch(self) -> None: From 42afc5ed58d373a3b5d28089cdea788b2f6f05ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Mon, 9 Oct 2023 12:20:09 +0200 Subject: [PATCH 152/224] Convert paths to strings in stream redirection --- varats-core/varats/project/varats_command.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index 7d8ad45c6..3a78a5630 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -68,10 +68,10 @@ def as_plumbum(self, **kwargs: tp.Any) -> 'BoundEnvCommand': cmd = super().as_plumbum(**kwargs) if self._redirect_stdin: - cmd = cmd < self._redirect_stdin.render(**kwargs) + cmd = cmd < str(self._redirect_stdin.render(**kwargs)) if self._redirect_stdout: - cmd = cmd > self._redirect_stdout.render(**kwargs) + cmd = cmd > str(self._redirect_stdout.render(**kwargs)) return cmd From 83a4ac830083307a252d346aeec3b3ac83f77b4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Mon, 9 Oct 2023 13:45:11 +0200 Subject: [PATCH 153/224] Add VCommand wrapper function --- varats-core/varats/project/varats_command.py | 16 ++++++++++++++++ .../experiments/vara/feature_perf_precision.py | 12 +++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index 3a78a5630..254003d41 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -2,6 +2,9 @@ import typing as tp from benchbuild.command import Command, ProjectCommand, PathToken +from benchbuild.utils.cmd import time +from plumbum import local +from plumbum.machines import LocalCommand from varats.utils.config import get_config_patches @@ -75,6 +78,19 @@ def as_plumbum(self, **kwargs: tp.Any) -> 'BoundEnvCommand': return cmd + def as_plumbum_wrapped_with( + self, cmd: 'BoundEnvCommand', **kwargs: tp.Any + ) -> 'BoundEnvCommand': + cmd = cmd[super().as_plumbum(**kwargs)] + + if self._redirect_stdin: + cmd = cmd < str(self._redirect_stdin.render(**kwargs)) + + if self._redirect_stdout: + cmd = cmd > str(self._redirect_stdout.render(**kwargs)) + + return cmd + class VProjectCommand(ProjectCommand): # type: ignore diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 37ecf3290..a0bac3bf6 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -594,16 +594,14 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: f".{self.__report_file_ending}" ) - pb_cmd = prj_command.command.as_plumbum( - project=self.project - ) print(f"Running example {prj_command.command.label}") - timed_pb_cmd = time["-v", "-o", time_report_file, - pb_cmd] - with cleanup(prj_command): - timed_pb_cmd(retcode=self.__binary.valid_exit_codes) + pb_cmd = prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + project=self.project + ) + pb_cmd(retcode=self.__binary.valid_exit_codes) return StepResult.OK From d48ec5e29d34a0d29b1abe63cc5fa4fbe6350fe8 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 9 Oct 2023 19:23:13 +0200 Subject: [PATCH 154/224] More von Sebi Runtime example --- .../vara/feature_perf_precision.py | 37 +++++++++---------- .../perf_tests/feature_perf_cs_collection.py | 25 +++++++++++-- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index a0bac3bf6..758b941fd 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -59,9 +59,9 @@ def perf_prec_workload_commands( """Uniformly select the workloads that should be processed.""" return workload_commands(project, binary, [ WorkloadCategory.EXAMPLE - ]) + workload_commands(project, binary, [ - WorkloadCategory.SMALL - ]) + workload_commands(project, binary, [WorkloadCategory.MEDIUM]) + ]) + workload_commands( + project, binary, [WorkloadCategory.SMALL] + ) # + workload_commands(project, binary, [WorkloadCategory.MEDIUM]) def select_project_binaries(project: VProject) -> tp.List[ProjectBinaryWrapper]: @@ -229,6 +229,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: with local.env( VARA_TRACE_FILE=local_tracefile_path ): + # TODO: figure out how to handle this pb_cmd = prj_command.command.as_plumbum( project=self.project ) @@ -747,16 +748,14 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) with local.env(VARA_TRACE_FILE=fake_tracefile_path): - pb_cmd = prj_command.command.as_plumbum( - project=self.project - ) print(f"Running example {prj_command.command.label}") - timed_pb_cmd = time["-v", "-o", time_report_file, "--", - pb_cmd] - with cleanup(prj_command): - timed_pb_cmd(retcode=self._binary.valid_exit_codes) + pb_cmd = prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + project=self.project + ) + pb_cmd(retcode=self._binary.valid_exit_codes) return StepResult.OK @@ -807,6 +806,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) with local.env(VARA_TRACE_FILE=fake_tracefile_path): + # TODO: figure out how to handle this pb_cmd = prj_command.command.as_plumbum( project=self.project ) @@ -1102,17 +1102,14 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: f".{self.__report_file_ending}" ) - pb_cmd = prj_command.command.as_plumbum( - project=self.project - ) - print(f"Running example {prj_command.command.label}") - - # timed_pb_cmd = time["-v", "-o", time_report_file, pb_cmd] - timed_pb_cmd = time["-v", "-o", time_report_file, "--", - pb_cmd] - with cleanup(prj_command): - timed_pb_cmd(retcode=self.__binary.valid_exit_codes) + print(f"Running example {prj_command.command.label}") + pb_cmd = prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + project=self.project + ) + + pb_cmd(retcode=self.__binary.valid_exit_codes) return StepResult.OK diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 134709eea..eadba54d6 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -753,12 +753,31 @@ class SynthIPRuntime(VProject): "https://github.com/simonepri/geo-maps/releases/" "download/v0.6.0" }, - files=["countries-land-1km.geo.json", "countries-land-1m.geo.json"] + files=[ + "countries-land-10km.geo.json", "countries-land-1km.geo.json", + "countries-land-1m.geo.json" + ] ) ] WORKLOADS = { WorkloadSet(WorkloadCategory.SMALL): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="countries-land-10km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-10km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ) + ], + WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), @@ -774,11 +793,11 @@ class SynthIPRuntime(VProject): "countries-land-1km.geo.json.compressed" ) ], - WorkloadSet(WorkloadCategory.MEDIUM): [ + WorkloadSet(WorkloadCategory.LARGE): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - label="countries-land-1km", + label="countries-land-1m", creates=[ SourceRoot("geo-maps") / "countries-land-1m.geo.json.compressed" From b4596def7e293fd8d60acd384c7430509f405f27 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 9 Oct 2023 19:23:32 +0200 Subject: [PATCH 155/224] Adds hints for dune --- varats/varats/projects/cpp_projects/dune.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/varats/varats/projects/cpp_projects/dune.py b/varats/varats/projects/cpp_projects/dune.py index 37c2a2d71..e0376d5c7 100644 --- a/varats/varats/projects/cpp_projects/dune.py +++ b/varats/varats/projects/cpp_projects/dune.py @@ -33,6 +33,12 @@ class DunePerfRegression(VProject): add `-mllvm --vara-disable-phasar` to the projects `cflags` to disable phasar passes. This will still allow to analyse compile-time variability. + + Might need deps: + * klu + * spqr + * umfpack + * eigen3 """ NAME = 'DunePerfRegression' From f7be54e9622f966184e6951870e769efb4760e3a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 9 Oct 2023 20:41:52 +0200 Subject: [PATCH 156/224] Adapts workloads for Sebi CS --- .../perf_tests/feature_perf_cs_collection.py | 157 ++++++++++++------ 1 file changed, 109 insertions(+), 48 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index eadba54d6..116e4fa79 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -754,8 +754,8 @@ class SynthIPRuntime(VProject): "download/v0.6.0" }, files=[ - "countries-land-10km.geo.json", "countries-land-1km.geo.json", - "countries-land-1m.geo.json" + "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", + "countries-land-1km.geo.json", "countries-land-1m.geo.json" ] ) ] @@ -765,48 +765,48 @@ class SynthIPRuntime(VProject): VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - label="countries-land-10km", + label="earth-lakes-10km", creates=[ SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" + "earth-lakes-10km.geo.json.compressed" ], requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-10km.geo.json", + "earth-lakes-10km.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" + "earth-lakes-10km.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - label="countries-land-1km", + label="countries-land-10km", creates=[ SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "countries-land-10km.geo.json.compressed" ], requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1km.geo.json", + "countries-land-10km.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "countries-land-10km.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.LARGE): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - label="countries-land-1m", + label="countries-land-1km", creates=[ SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" + "countries-land-1km.geo.json.compressed" ], requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1m.geo.json", + "countries-land-1km.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" + "countries-land-1km.geo.json.compressed" ) ], } @@ -861,39 +861,60 @@ class SynthIPTemplate(VProject): "https://github.com/simonepri/geo-maps/releases/" "download/v0.6.0" }, - files=["countries-land-1km.geo.json", "countries-land-1m.geo.json"] + files=[ + "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", + "countries-land-1km.geo.json", "countries-land-1m.geo.json" + ] ) ] WORKLOADS = { WorkloadSet(WorkloadCategory.SMALL): [ VCommand( - SourceRoot("SynthIPTemplate") / RSBinary("Template"), - label="countries-land-1km", + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="earth-lakes-10km", creates=[ SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "earth-lakes-10km.geo.json.compressed" ], - requires_all_patch={"Compress"}, + requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1km.geo.json", + "earth-lakes-10km.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "earth-lakes-10km.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( - SourceRoot("SynthIPTemplate") / RSBinary("Template"), + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="countries-land-10km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-10km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ) + ], + WorkloadSet(WorkloadCategory.LARGE): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), label="countries-land-1km", creates=[ SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" + "countries-land-1km.geo.json.compressed" ], - requires_all_patch={"Compress"}, + requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1m.geo.json", + "countries-land-1km.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" + "countries-land-1km.geo.json.compressed" ) ], } @@ -948,39 +969,60 @@ class SynthIPTemplate2(VProject): "https://github.com/simonepri/geo-maps/releases/" "download/v0.6.0" }, - files=["countries-land-1km.geo.json", "countries-land-1m.geo.json"] + files=[ + "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", + "countries-land-1km.geo.json", "countries-land-1m.geo.json" + ] ) ] WORKLOADS = { WorkloadSet(WorkloadCategory.SMALL): [ VCommand( - SourceRoot("SynthIPTemplate2") / RSBinary("Template2"), - label="countries-land-1km", + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="earth-lakes-10km", creates=[ SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "earth-lakes-10km.geo.json.compressed" ], - requires_all_patch={"Compress"}, + requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1km.geo.json", + "earth-lakes-10km.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "earth-lakes-10km.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( - SourceRoot("SynthIPTemplate2") / RSBinary("Template2"), + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="countries-land-10km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-10km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ) + ], + WorkloadSet(WorkloadCategory.LARGE): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), label="countries-land-1km", creates=[ SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" + "countries-land-1km.geo.json.compressed" ], - requires_all_patch={"Compress"}, + requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1m.geo.json", + "countries-land-1km.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" + "countries-land-1km.geo.json.compressed" ) ], } @@ -1035,41 +1077,60 @@ class SynthIPCombined(VProject): "https://github.com/simonepri/geo-maps/releases/" "download/v0.6.0" }, - files=["countries-land-1km.geo.json", "countries-land-1m.geo.json"] + files=[ + "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", + "countries-land-1km.geo.json", "countries-land-1m.geo.json" + ] ) ] WORKLOADS = { WorkloadSet(WorkloadCategory.SMALL): [ VCommand( - SourceRoot("SynthIPCombined") / RSBinary("Combined"), + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - label="countries-land-1km", + label="earth-lakes-10km", creates=[ SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "earth-lakes-10km.geo.json.compressed" ], requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1km.geo.json", + "earth-lakes-10km.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "earth-lakes-10km.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( - SourceRoot("SynthIPCombined") / RSBinary("Combined"), + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="countries-land-10km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-10km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ) + ], + WorkloadSet(WorkloadCategory.LARGE): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), label="countries-land-1km", creates=[ SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" + "countries-land-1km.geo.json.compressed" ], requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1m.geo.json", + "countries-land-1km.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" + "countries-land-1km.geo.json.compressed" ) ], } From ab0d13b2eb26801c59272b4c200d4918a10c51aa Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 9 Oct 2023 20:44:17 +0200 Subject: [PATCH 157/224] A few dune tweaks --- .../experiments/vara/feature_perf_precision.py | 16 +++++++++++++--- varats/varats/projects/cpp_projects/dune.py | 6 ++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 758b941fd..fc27a1543 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -83,6 +83,13 @@ def get_extra_cflags(project: VProject) -> tp.List[str]: return [] +def get_threshold(project: VProject) -> int: + if project.DOMAIN.value is ProjectDomains.TEST: + return 0 + + return 50 + + class AnalysisProjectStepBase(OutputFolderStep): project: VProject @@ -383,9 +390,12 @@ def setup_actions_for_vara_experiment( project.cflags += experiment.get_vara_feature_cflags(project) - threshold = 0 if project.DOMAIN.value is ProjectDomains.TEST else 100 + threshold = get_threshold(project) project.cflags += experiment.get_vara_tracing_cflags( - instr_type, project=project, instruction_threshold=threshold + instr_type, + project=project, + save_temps=True, + instruction_threshold=threshold ) project.cflags += get_extra_cflags(project) @@ -917,7 +927,7 @@ def setup_actions_for_vara_overhead_experiment( ) -> tp.MutableSequence[actions.Step]: project.cflags += experiment.get_vara_feature_cflags(project) - threshold = 0 if project.DOMAIN.value is ProjectDomains.TEST else 100 + threshold = get_threshold(project) project.cflags += experiment.get_vara_tracing_cflags( instr_type, project=project, instruction_threshold=threshold ) diff --git a/varats/varats/projects/cpp_projects/dune.py b/varats/varats/projects/cpp_projects/dune.py index e0376d5c7..6ad588134 100644 --- a/varats/varats/projects/cpp_projects/dune.py +++ b/varats/varats/projects/cpp_projects/dune.py @@ -16,6 +16,7 @@ BinaryType, ProjectBinaryWrapper, ) +from varats.project.sources import FeatureSource from varats.project.varats_project import VProject from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap @@ -48,12 +49,13 @@ class DunePerfRegression(VProject): SOURCE = [ PaperConfigSpecificGit( project_name='DunePerfRegression', - remote='git@github.com:se-sic/dune-VaRA.git', + remote='https://github.com/se-sic/dune-VaRA.git', local='dune-VaRA', refspec='origin/HEAD', limit=None, shallow=False - ) + ), + FeatureSource() ] CONTAINER = get_base_image(ImageBase.DEBIAN_10) From 8d8d2039ec4a508e4b021c99a92ea2a48a50382f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 9 Oct 2023 21:25:21 +0200 Subject: [PATCH 158/224] Adapt dune to VCommand and disable MPI --- varats/varats/projects/cpp_projects/dune.py | 36 ++++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/varats/varats/projects/cpp_projects/dune.py b/varats/varats/projects/cpp_projects/dune.py index 6ad588134..00f70771f 100644 --- a/varats/varats/projects/cpp_projects/dune.py +++ b/varats/varats/projects/cpp_projects/dune.py @@ -2,7 +2,7 @@ import typing as tp import benchbuild as bb -from benchbuild.command import WorkloadSet, Command, SourceRoot +from benchbuild.command import WorkloadSet, SourceRoot from benchbuild.utils import cmd from benchbuild.utils.revision_ranges import RevisionRange from plumbum import local @@ -17,6 +17,7 @@ ProjectBinaryWrapper, ) from varats.project.sources import FeatureSource +from varats.project.varats_command import VCommand from varats.project.varats_project import VProject from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap @@ -62,13 +63,13 @@ class DunePerfRegression(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('dune-performance-regressions'), label='dune-helloworld' ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson-test'), @@ -79,48 +80,48 @@ class DunePerfRegression(VProject): 'poisson-yasp-Q2-3d.vtu' ] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson-ug-pk-2d'), label='poisson-ug-pk-2d', creates=['poisson-UG-Pk-2d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson-yasp-q1-2d'), label='poisson-yasp-q1-2d', creates=['poisson-yasp-q1-2d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson-yasp-q1-3d'), label='poisson-yasp-q1-3d', creates=['poisson-yasp-q1-3d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson-yasp-q2-2d'), label='poisson-yasp-q2-2d', creates=['poisson-yasp-q2-2d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson-yasp-q2-3d'), label='poisson-yasp-q2-3d', creates=['poisson-yasp-q2-3d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('dune_performance_regressions'), label='dune_helloworld' ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson_test'), @@ -131,35 +132,35 @@ class DunePerfRegression(VProject): 'poisson-yasp-Q2-3d.vtu' ] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson_ug_pk_2d'), label='poisson_ug_pk_2d', creates=['poisson-UG-Pk-2d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson_yasp_q1_2d'), label='poisson_yasp_q1_2d', creates=['poisson-yasp-q1-2d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson_yasp_q1_3d'), label='poisson_yasp_q1_3d', creates=['poisson-yasp-q1-3d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson_yasp_q2_2d'), label='poisson_yasp_q2_2d', creates=['poisson-yasp-q2-2d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson_yasp_q2_3d'), @@ -294,7 +295,10 @@ def compile(self) -> None: with local.env( CC=c_compiler, CXX=cxx_compiler, - CMAKE_FLAGS="-DDUNE_ENABLE_PYTHONBINDINGS=OFF" + CMAKE_FLAGS=" ".join([ + "-DDUNE_ENABLE_PYTHONBINDINGS=OFF", + "-DCMAKE_DISABLE_FIND_PACKAGE_MPI=TRUE" + ]) ): dunecontrol = cmd['./dune-common/bin/dunecontrol'] From 3fd73e71f242188d64f9faa9ae490335e2e0af7f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 9 Oct 2023 22:17:12 +0200 Subject: [PATCH 159/224] Larger ring buffer --- varats/varats/experiments/vara/feature_perf_precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index fc27a1543..1c004d179 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -285,7 +285,7 @@ def attach_usdt_raw_tracing( bpftrace_script = bpftrace["-o", report_file, "-q", bpftrace_script_location, binary] - bpftrace_script = bpftrace_script.with_env(BPFTRACE_PERF_RB_PAGES=4096) + bpftrace_script = bpftrace_script.with_env(BPFTRACE_PERF_RB_PAGES=8192) # Assertion: Can be run without sudo password prompt. bpftrace_cmd = sudo[bpftrace_script] From 72328aa5d36499f4ff2b3d19d65b21a448affac5 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 08:02:08 +0200 Subject: [PATCH 160/224] Adapts bpftrace exp. to new wrapper interface --- varats-core/varats/project/varats_command.py | 14 +++++++-- .../vara/feature_perf_precision.py | 29 +++++++------------ 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index 254003d41..5d20232af 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -79,9 +79,19 @@ def as_plumbum(self, **kwargs: tp.Any) -> 'BoundEnvCommand': return cmd def as_plumbum_wrapped_with( - self, cmd: 'BoundEnvCommand', **kwargs: tp.Any + self, cmd: 'BoundEnvCommand', + adapted_binary_location: tp.Optional[Path], **kwargs: tp.Any ) -> 'BoundEnvCommand': - cmd = cmd[super().as_plumbum(**kwargs)] + base_cmd = super().as_plumbum(**kwargs) + + # TODO: maybe we should just provide a callable to modify the original + # command + if adapted_binary_location: + base_cmd.executable = base_cmd.executable.copy( + adapted_binary_location, override=True + ) + + cmd = cmd[base_cmd] if self._redirect_stdin: cmd = cmd < str(self._redirect_stdin.render(**kwargs)) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 1c004d179..778e9b5f4 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -236,13 +236,9 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: with local.env( VARA_TRACE_FILE=local_tracefile_path ): - # TODO: figure out how to handle this pb_cmd = prj_command.command.as_plumbum( project=self.project ) - print( - f"Running example {prj_command.command.label}" - ) adapted_binary_location = Path( non_nfs_tmp_dir @@ -260,6 +256,9 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) with cleanup(prj_command): + print( + f"Running example {prj_command.command.label}" + ) pb_cmd( retcode=self._binary.valid_exit_codes ) @@ -816,34 +815,26 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: ) with local.env(VARA_TRACE_FILE=fake_tracefile_path): - # TODO: figure out how to handle this - pb_cmd = prj_command.command.as_plumbum( - project=self.project - ) - print( - f"Running example {prj_command.command.label}" - ) adapted_binary_location = Path( non_nfs_tmp_dir ) / self._binary.name - # Store binary in a local tmp dir that is not on nfs - pb_cmd.executable = pb_cmd.executable.copy( - adapted_binary_location, override=True + pb_cmd = prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + adapted_binary_location, + project=self.project ) - timed_pb_cmd = time["-v", "-o", time_report_file, - "--", pb_cmd] - bpf_runner = RunBPFTracedWorkloads.attach_usdt_raw_tracing( fake_tracefile_path, adapted_binary_location, Path(non_nfs_tmp_dir) ) with cleanup(prj_command): - timed_pb_cmd( - retcode=self._binary.valid_exit_codes + print( + f"Running example {prj_command.command.label}" ) + pb_cmd(retcode=self._binary.valid_exit_codes) # wait for bpf script to exit if bpf_runner: From 929f0ee6850a2ea3e95b9ef28d2d2215f84b1221 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 08:59:00 +0200 Subject: [PATCH 161/224] Adapts bpftrace handling --- varats-core/varats/project/varats_command.py | 17 ++++++++++++----- .../experiments/vara/feature_perf_precision.py | 11 ++++------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index 5d20232af..f63f56dca 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -1,9 +1,11 @@ """Custom version of benchbuild's Command for use with the VaRA-Tool-Suite.""" import typing as tp +from pathlib import Path from benchbuild.command import Command, ProjectCommand, PathToken from benchbuild.utils.cmd import time from plumbum import local +from plumbum.commands.base import BaseCommand from plumbum.machines import LocalCommand from varats.utils.config import get_config_patches @@ -79,19 +81,24 @@ def as_plumbum(self, **kwargs: tp.Any) -> 'BoundEnvCommand': return cmd def as_plumbum_wrapped_with( - self, cmd: 'BoundEnvCommand', - adapted_binary_location: tp.Optional[Path], **kwargs: tp.Any - ) -> 'BoundEnvCommand': + self, + wrapper_cmd: tp.Optional['BoundEnvCommand'] = None, + adapted_binary_location: tp.Optional[Path] = None, + **kwargs: tp.Any + ) -> 'BaseCommand': base_cmd = super().as_plumbum(**kwargs) # TODO: maybe we should just provide a callable to modify the original # command if adapted_binary_location: - base_cmd.executable = base_cmd.executable.copy( + base_cmd.cmd.executable = base_cmd.cmd.executable.copy( adapted_binary_location, override=True ) - cmd = cmd[base_cmd] + if wrapper_cmd: + cmd = wrapper_cmd[base_cmd] + else: + cmd = base_cmd if self._redirect_stdin: cmd = cmd < str(self._redirect_stdin.render(**kwargs)) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 778e9b5f4..1a9a063fc 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -236,17 +236,14 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: with local.env( VARA_TRACE_FILE=local_tracefile_path ): - pb_cmd = prj_command.command.as_plumbum( - project=self.project - ) - adapted_binary_location = Path( non_nfs_tmp_dir ) / self._binary.name - # Store binary in a local tmp dir that is not on nfs - pb_cmd.executable = pb_cmd.executable.copy( - adapted_binary_location, override=True + pb_cmd = prj_command.command.as_plumbum_wrapped_with( + adapted_binary_location= + adapted_binary_location, + project=self.project ) bpf_runner = bpf_runner = self.attach_usdt_raw_tracing( From 7deddd986894f2fc51a08d53974d061903d686ea Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 10:36:11 +0200 Subject: [PATCH 162/224] Fixes parsing errors in json --- varats-core/varats/report/tef_report.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/varats-core/varats/report/tef_report.py b/varats-core/varats/report/tef_report.py index d447133d1..c3a4557b1 100644 --- a/varats-core/varats/report/tef_report.py +++ b/varats-core/varats/report/tef_report.py @@ -148,8 +148,26 @@ def stack_frames(self) -> None: "Stack frame parsing is currently not implemented!" ) + def _patch_errors_from_file(self): + with open(self.path, "r") as f: + data = f.read() + + with open(self.path, "w") as f: + remove_lost_events = re.compile('Lost \d+ events') + for line in data.splitlines(): + if "Lost" in line: + LOG.error( + "Events where lost during tracing, patching json file." + ) + line = remove_lost_events.sub("", line) + + f.write(line) + def _parse_json(self) -> None: trace_events: tp.List[TraceEvent] = list() + + self._patch_errors_from_file() + with open(self.path, "rb") as f: parser = ijson.parse(f) trace_event: tp.Dict[str, str] = {} From 5943e769c0ed1d5a9d2dad5b983b233c060a2bbc Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 11:07:14 +0200 Subject: [PATCH 163/224] Fixes print --- .../varats/data/databases/feature_perf_precision_database.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index c6262b32d..9258710df 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -141,9 +141,7 @@ def precise_pim_regression_check( # TODO: check, maybe we need a "very small value cut off" if ttest_res.pvalue < 0.05: - # print( - # f"{self.name} found regression for feature {feature}." - # ) + # print(f"Found regression for feature {feature}.") is_regression = True else: print(f"Could not find feature {feature} in new trace.") From bf17bca23facdf58e5efb6e9816c09964f7e09c9 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 12:34:57 +0200 Subject: [PATCH 164/224] Update workloads after reg. fix --- .../vara/feature_perf_precision.py | 6 +-- .../perf_tests/feature_perf_cs_collection.py | 44 +++++++++++++------ 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 1a9a063fc..956215911 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -59,9 +59,9 @@ def perf_prec_workload_commands( """Uniformly select the workloads that should be processed.""" return workload_commands(project, binary, [ WorkloadCategory.EXAMPLE - ]) + workload_commands( - project, binary, [WorkloadCategory.SMALL] - ) # + workload_commands(project, binary, [WorkloadCategory.MEDIUM]) + ]) + workload_commands(project, binary, [ + WorkloadCategory.SMALL + ]) + workload_commands(project, binary, [WorkloadCategory.MEDIUM]) def select_project_binaries(project: VProject) -> tp.List[ProjectBinaryWrapper]: diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 116e4fa79..361d7e8b4 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -754,59 +754,75 @@ class SynthIPRuntime(VProject): "download/v0.6.0" }, files=[ - "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", - "countries-land-1km.geo.json", "countries-land-1m.geo.json" + "countries-land-10km.geo.json", "countries-land-500m.geo.json", + "countries-land-250m.geo.json", "countries-land-1m.geo.json" ] ) ] WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="countries-land-10km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-10km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ) + ], WorkloadSet(WorkloadCategory.SMALL): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - label="earth-lakes-10km", + label="countries-land-500m", creates=[ SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json.compressed" + "countries-land-500m.geo.json.compressed" ], requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json", + "countries-land-500m.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json.compressed" + "countries-land-500m.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - label="countries-land-10km", + label="countries-land-250m", creates=[ SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" + "countries-land-250m.geo.json.compressed" ], requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-10km.geo.json", + "countries-land-250m.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" + "countries-land-250m.geo.json.compressed" ) ], WorkloadSet(WorkloadCategory.LARGE): [ VCommand( SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), ConfigParams("-c"), - label="countries-land-1km", + label="countries-land-1m", creates=[ SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "countries-land-1m.geo.json.compressed" ], requires_all_args={"-c"}, redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1km.geo.json", + "countries-land-1m.geo.json", redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" + "countries-land-1m.geo.json.compressed" ) ], } From 6d36d3ed3a6e690c87cdfa802221b578a105f417 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 14:37:07 +0200 Subject: [PATCH 165/224] Generalizes workloads for IP projects --- .../perf_tests/feature_perf_cs_collection.py | 357 +++++++----------- 1 file changed, 140 insertions(+), 217 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 361d7e8b4..9a5810b89 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -4,6 +4,7 @@ import benchbuild as bb from benchbuild.command import Command, SourceRoot, WorkloadSet +from benchbuild.project import Workloads from benchbuild.source import HTTPMultiple from benchbuild.utils.cmd import make, cmake, mkdir from benchbuild.utils.revision_ranges import RevisionRange @@ -729,6 +730,126 @@ def recompile(self) -> None: _do_feature_perf_cs_collection_recompile(self) +IP_WORKLOADS: Workloads = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="countries-land-10km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-10km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ), + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-d"), + label="countries-land-10km", + creates=[ + SourceRoot("geo-maps-compr") / "countries-land-10km.geo.json" + ], + requires_all_args={"-d"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-10km.geo.json.compressed", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-10km.geo.json" + ) + ], + WorkloadSet(WorkloadCategory.SMALL): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="countries-land-500m", + creates=[ + SourceRoot("geo-maps") / + "countries-land-500m.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-500m.geo.json", + redirect_stdout=SourceRoot("geo-maps") / + "countries-land-500m.geo.json.compressed" + ), + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-d"), + label="countries-land-500m", + creates=[ + SourceRoot("geo-maps-compr") / "countries-land-500m.geo.json" + ], + requires_all_args={"-d"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-500m.geo.json.compressed", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-500m.geo.json" + ) + ], + WorkloadSet(WorkloadCategory.MEDIUM): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="countries-land-250m", + creates=[ + SourceRoot("geo-maps-compr") / + "countries-land-250m.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-250m.geo.json", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-250m.geo.json.compressed" + ), + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-d"), + label="countries-land-250m", + creates=[ + SourceRoot("geo-maps-compr") / "countries-land-250m.geo.json" + ], + requires_all_args={"-d"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-250m.geo.json.compressed", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-250m.geo.json" + ) + ], + WorkloadSet(WorkloadCategory.LARGE): [ + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-c"), + label="countries-land-1m", + creates=[ + SourceRoot("geo-maps-compr") / + "countries-land-1m.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-1m.geo.json", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-1m.geo.json.compressed" + ), + VCommand( + SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), + ConfigParams("-d"), + label="countries-land-1m", + creates=[ + SourceRoot("geo-maps-compr") / "countries-land-1m.geo.json" + ], + requires_all_args={"-d"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-1m.geo.json.compressed", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-1m.geo.json" + ) + ], +} + + class SynthIPRuntime(VProject): """Synthetic case-study project for testing flow sensitivity.""" @@ -757,75 +878,24 @@ class SynthIPRuntime(VProject): "countries-land-10km.geo.json", "countries-land-500m.geo.json", "countries-land-250m.geo.json", "countries-land-1m.geo.json" ] - ) + ), + HTTPMultiple( + local="geo-maps-compr", + remote={ + "1.0": + "https://github.com/se-sic/compression-data/raw/master/" + "example_comp/geo-maps/" + }, + files=[ + "countries-land-10km.geo.json.compressed", + "countries-land-1m.geo.json.compressed", + "countries-land-250m.geo.json.compressed", + "countries-land-500m.geo.json.compressed" + ] + ), ] - WORKLOADS = { - WorkloadSet(WorkloadCategory.EXAMPLE): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-10km", - creates=[ - SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-10km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" - ) - ], - WorkloadSet(WorkloadCategory.SMALL): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-500m", - creates=[ - SourceRoot("geo-maps") / - "countries-land-500m.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-500m.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-500m.geo.json.compressed" - ) - ], - WorkloadSet(WorkloadCategory.MEDIUM): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-250m", - creates=[ - SourceRoot("geo-maps") / - "countries-land-250m.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-250m.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-250m.geo.json.compressed" - ) - ], - WorkloadSet(WorkloadCategory.LARGE): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-1m", - creates=[ - SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1m.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1m.geo.json.compressed" - ) - ], - } + WORKLOADS = IP_WORKLOADS @staticmethod def binaries_for_revision( @@ -884,56 +954,7 @@ class SynthIPTemplate(VProject): ) ] - WORKLOADS = { - WorkloadSet(WorkloadCategory.SMALL): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="earth-lakes-10km", - creates=[ - SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json.compressed" - ) - ], - WorkloadSet(WorkloadCategory.MEDIUM): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-10km", - creates=[ - SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-10km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" - ) - ], - WorkloadSet(WorkloadCategory.LARGE): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-1km", - creates=[ - SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" - ) - ], - } + WORKLOADS = IP_WORKLOADS @staticmethod def binaries_for_revision( @@ -992,56 +1013,7 @@ class SynthIPTemplate2(VProject): ) ] - WORKLOADS = { - WorkloadSet(WorkloadCategory.SMALL): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="earth-lakes-10km", - creates=[ - SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json.compressed" - ) - ], - WorkloadSet(WorkloadCategory.MEDIUM): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-10km", - creates=[ - SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-10km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" - ) - ], - WorkloadSet(WorkloadCategory.LARGE): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-1km", - creates=[ - SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" - ) - ], - } + WORKLOADS = IP_WORKLOADS @staticmethod def binaries_for_revision( @@ -1100,56 +1072,7 @@ class SynthIPCombined(VProject): ) ] - WORKLOADS = { - WorkloadSet(WorkloadCategory.SMALL): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="earth-lakes-10km", - creates=[ - SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "earth-lakes-10km.geo.json.compressed" - ) - ], - WorkloadSet(WorkloadCategory.MEDIUM): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-10km", - creates=[ - SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-10km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" - ) - ], - WorkloadSet(WorkloadCategory.LARGE): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-1km", - creates=[ - SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-1km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-1km.geo.json.compressed" - ) - ], - } + WORKLOADS = IP_WORKLOADS @staticmethod def binaries_for_revision( From 407aa08945a0d1f1345818b6dfabd9fa5da32736 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 15:30:13 +0200 Subject: [PATCH 166/224] Refactor workload handling for SynthIPs --- .../perf_tests/feature_perf_cs_collection.py | 288 ++++++++++-------- 1 file changed, 167 insertions(+), 121 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 9a5810b89..929d76561 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -730,124 +730,128 @@ def recompile(self) -> None: _do_feature_perf_cs_collection_recompile(self) -IP_WORKLOADS: Workloads = { - WorkloadSet(WorkloadCategory.EXAMPLE): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-10km", - creates=[ - SourceRoot("geo-maps") / +def get_ip_workloads(project_source_name: str, binary_name: str) -> Workloads: + return { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot(project_source_name) / RSBinary(binary_name), + ConfigParams("-c"), + label="countries-land-10km", + creates=[ + SourceRoot("geo-maps") / + "countries-land-10km.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-10km.geo.json", + redirect_stdout=SourceRoot("geo-maps") / "countries-land-10km.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-10km.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-10km.geo.json.compressed" - ), - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-d"), - label="countries-land-10km", - creates=[ - SourceRoot("geo-maps-compr") / "countries-land-10km.geo.json" - ], - requires_all_args={"-d"}, - redirect_stdin=SourceRoot("geo-maps-compr") / - "countries-land-10km.geo.json.compressed", - redirect_stdout=SourceRoot("geo-maps-compr") / - "countries-land-10km.geo.json" - ) - ], - WorkloadSet(WorkloadCategory.SMALL): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-500m", - creates=[ - SourceRoot("geo-maps") / + ), + VCommand( + SourceRoot(project_source_name) / RSBinary(binary_name), + ConfigParams("-d"), + label="countries-land-10km", + creates=[ + SourceRoot("geo-maps-compr") / + "countries-land-10km.geo.json" + ], + requires_all_args={"-d"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-10km.geo.json.compressed", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-10km.geo.json" + ) + ], + WorkloadSet(WorkloadCategory.SMALL): [ + VCommand( + SourceRoot(project_source_name) / RSBinary(binary_name), + ConfigParams("-c"), + label="countries-land-500m", + creates=[ + SourceRoot("geo-maps") / + "countries-land-500m.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-500m.geo.json", + redirect_stdout=SourceRoot("geo-maps") / "countries-land-500m.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps") / - "countries-land-500m.geo.json", - redirect_stdout=SourceRoot("geo-maps") / - "countries-land-500m.geo.json.compressed" - ), - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-d"), - label="countries-land-500m", - creates=[ - SourceRoot("geo-maps-compr") / "countries-land-500m.geo.json" - ], - requires_all_args={"-d"}, - redirect_stdin=SourceRoot("geo-maps-compr") / - "countries-land-500m.geo.json.compressed", - redirect_stdout=SourceRoot("geo-maps-compr") / - "countries-land-500m.geo.json" - ) - ], - WorkloadSet(WorkloadCategory.MEDIUM): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-250m", - creates=[ - SourceRoot("geo-maps-compr") / + ), + VCommand( + SourceRoot(project_source_name) / RSBinary(binary_name), + ConfigParams("-d"), + label="countries-land-500m", + creates=[ + SourceRoot("geo-maps-compr") / + "countries-land-500m.geo.json" + ], + requires_all_args={"-d"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-500m.geo.json.compressed", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-500m.geo.json" + ) + ], + WorkloadSet(WorkloadCategory.MEDIUM): [ + VCommand( + SourceRoot(project_source_name) / RSBinary(binary_name), + ConfigParams("-c"), + label="countries-land-250m", + creates=[ + SourceRoot("geo-maps-compr") / + "countries-land-250m.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-250m.geo.json", + redirect_stdout=SourceRoot("geo-maps") / "countries-land-250m.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps-compr") / - "countries-land-250m.geo.json", - redirect_stdout=SourceRoot("geo-maps-compr") / - "countries-land-250m.geo.json.compressed" - ), - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-d"), - label="countries-land-250m", - creates=[ - SourceRoot("geo-maps-compr") / "countries-land-250m.geo.json" - ], - requires_all_args={"-d"}, - redirect_stdin=SourceRoot("geo-maps-compr") / - "countries-land-250m.geo.json.compressed", - redirect_stdout=SourceRoot("geo-maps-compr") / - "countries-land-250m.geo.json" - ) - ], - WorkloadSet(WorkloadCategory.LARGE): [ - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-c"), - label="countries-land-1m", - creates=[ - SourceRoot("geo-maps-compr") / + ), + VCommand( + SourceRoot(project_source_name) / RSBinary(binary_name), + ConfigParams("-d"), + label="countries-land-250m", + creates=[ + SourceRoot("geo-maps-compr") / + "countries-land-250m.geo.json" + ], + requires_all_args={"-d"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-250m.geo.json.compressed", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-250m.geo.json" + ) + ], + WorkloadSet(WorkloadCategory.LARGE): [ + VCommand( + SourceRoot(project_source_name) / RSBinary(binary_name), + ConfigParams("-c"), + label="countries-land-1m", + creates=[ + SourceRoot("geo-maps-compr") / + "countries-land-1m.geo.json.compressed" + ], + requires_all_args={"-c"}, + redirect_stdin=SourceRoot("geo-maps") / + "countries-land-1m.geo.json", + redirect_stdout=SourceRoot("geo-maps") / "countries-land-1m.geo.json.compressed" - ], - requires_all_args={"-c"}, - redirect_stdin=SourceRoot("geo-maps-compr") / - "countries-land-1m.geo.json", - redirect_stdout=SourceRoot("geo-maps-compr") / - "countries-land-1m.geo.json.compressed" - ), - VCommand( - SourceRoot("SynthIPRuntime") / RSBinary("Runtime"), - ConfigParams("-d"), - label="countries-land-1m", - creates=[ - SourceRoot("geo-maps-compr") / "countries-land-1m.geo.json" - ], - requires_all_args={"-d"}, - redirect_stdin=SourceRoot("geo-maps-compr") / - "countries-land-1m.geo.json.compressed", - redirect_stdout=SourceRoot("geo-maps-compr") / - "countries-land-1m.geo.json" - ) - ], -} + ), + VCommand( + SourceRoot(project_source_name) / RSBinary(binary_name), + ConfigParams("-d"), + label="countries-land-1m", + creates=[ + SourceRoot("geo-maps-compr") / "countries-land-1m.geo.json" + ], + requires_all_args={"-d"}, + redirect_stdin=SourceRoot("geo-maps-compr") / + "countries-land-1m.geo.json.compressed", + redirect_stdout=SourceRoot("geo-maps-compr") / + "countries-land-1m.geo.json" + ) + ], + } class SynthIPRuntime(VProject): @@ -895,7 +899,7 @@ class SynthIPRuntime(VProject): ), ] - WORKLOADS = IP_WORKLOADS + WORKLOADS = get_ip_workloads("SynthIPRuntime", "Runtime") @staticmethod def binaries_for_revision( @@ -951,10 +955,24 @@ class SynthIPTemplate(VProject): "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", "countries-land-1km.geo.json", "countries-land-1m.geo.json" ] - ) + ), + HTTPMultiple( + local="geo-maps-compr", + remote={ + "1.0": + "https://github.com/se-sic/compression-data/raw/master/" + "example_comp/geo-maps/" + }, + files=[ + "countries-land-10km.geo.json.compressed", + "countries-land-1m.geo.json.compressed", + "countries-land-250m.geo.json.compressed", + "countries-land-500m.geo.json.compressed" + ] + ), ] - WORKLOADS = IP_WORKLOADS + WORKLOADS = get_ip_workloads("SynthIPTemplate", "Template") @staticmethod def binaries_for_revision( @@ -1010,10 +1028,24 @@ class SynthIPTemplate2(VProject): "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", "countries-land-1km.geo.json", "countries-land-1m.geo.json" ] - ) + ), + HTTPMultiple( + local="geo-maps-compr", + remote={ + "1.0": + "https://github.com/se-sic/compression-data/raw/master/" + "example_comp/geo-maps/" + }, + files=[ + "countries-land-10km.geo.json.compressed", + "countries-land-1m.geo.json.compressed", + "countries-land-250m.geo.json.compressed", + "countries-land-500m.geo.json.compressed" + ] + ), ] - WORKLOADS = IP_WORKLOADS + WORKLOADS = get_ip_workloads("SynthIPTemplate2", "Template2") @staticmethod def binaries_for_revision( @@ -1069,10 +1101,24 @@ class SynthIPCombined(VProject): "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", "countries-land-1km.geo.json", "countries-land-1m.geo.json" ] - ) + ), + HTTPMultiple( + local="geo-maps-compr", + remote={ + "1.0": + "https://github.com/se-sic/compression-data/raw/master/" + "example_comp/geo-maps/" + }, + files=[ + "countries-land-10km.geo.json.compressed", + "countries-land-1m.geo.json.compressed", + "countries-land-250m.geo.json.compressed", + "countries-land-500m.geo.json.compressed" + ] + ), ] - WORKLOADS = IP_WORKLOADS + WORKLOADS = get_ip_workloads("SynthIPCombined", "Combined") @staticmethod def binaries_for_revision( From d11700532e2b7c76b4a0ff253e9b11617c5a3e81 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 15:31:56 +0200 Subject: [PATCH 167/224] Disable warnings --- varats/varats/experiments/vara/feature_perf_precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 956215911..c80e1bc8a 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -279,7 +279,7 @@ def attach_usdt_raw_tracing( bpftrace_script_location = non_nfs_tmp_dir / "RawUsdtTefMarker.bt" cp(orig_bpftrace_script_location, bpftrace_script_location) - bpftrace_script = bpftrace["-o", report_file, "-q", + bpftrace_script = bpftrace["-o", report_file, "--no-warnings", "-q", bpftrace_script_location, binary] bpftrace_script = bpftrace_script.with_env(BPFTRACE_PERF_RB_PAGES=8192) From de4cf20cc10beb7dada7e774119267c2c0619c88 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 20:13:48 +0200 Subject: [PATCH 168/224] Fixes wrong sourceroots --- .../varats/projects/perf_tests/feature_perf_cs_collection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 929d76561..52108a5a2 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -798,7 +798,7 @@ def get_ip_workloads(project_source_name: str, binary_name: str) -> Workloads: ConfigParams("-c"), label="countries-land-250m", creates=[ - SourceRoot("geo-maps-compr") / + SourceRoot("geo-maps") / "countries-land-250m.geo.json.compressed" ], requires_all_args={"-c"}, @@ -828,7 +828,7 @@ def get_ip_workloads(project_source_name: str, binary_name: str) -> Workloads: ConfigParams("-c"), label="countries-land-1m", creates=[ - SourceRoot("geo-maps-compr") / + SourceRoot("geo-maps") / "countries-land-1m.geo.json.compressed" ], requires_all_args={"-c"}, From cf89b119b3f3633009ea6f442f0ed5c968339a9f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 10 Oct 2023 23:29:50 +0200 Subject: [PATCH 169/224] Adds field sense --- .../perf_tests/feature_perf_cs_collection.py | 140 ++++++++++-------- 1 file changed, 79 insertions(+), 61 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 52108a5a2..ce51d9bed 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -170,67 +170,6 @@ def recompile(self) -> None: _do_feature_perf_cs_collection_recompile(self) -class SynthSAFieldSensitivity(VProject): - """Synthetic case-study project for testing field sensitivity.""" - - NAME = 'SynthSAFieldSensitivity' - GROUP = 'perf_tests' - DOMAIN = ProjectDomains.TEST - - SOURCE = [ - bb.source.Git( - remote="https://github.com/se-sic/FeaturePerfCSCollection.git", - local="SynthSAFieldSensitivity", - refspec="origin/HEAD", - limit=None, - shallow=False, - version_filter=project_filter_generator("SynthSAFieldSensitivity") - ), - FeatureSource() - ] - - WORKLOADS = { - WorkloadSet(WorkloadCategory.EXAMPLE): [ - VCommand( - SourceRoot("SynthSAFieldSensitivity") / RSBinary("FieldSense"), - ConfigParams(), - label="FieldSense-no-input" - ) - ] - } - - CONTAINER = get_base_image(ImageBase.DEBIAN_12) - - @staticmethod - def binaries_for_revision( - revision: ShortCommitHash # pylint: disable=W0613 - ) -> tp.List[ProjectBinaryWrapper]: - binary_map = RevisionBinaryMap( - get_local_project_git_path(SynthSAFieldSensitivity.NAME) - ) - - binary_map.specify_binary( - "build/bin/FieldSense", - BinaryType.EXECUTABLE, - only_valid_in=RevisionRange("0a9216d769", "master") - ) - - return binary_map[revision] - - def run_tests(self) -> None: - pass - - def compile(self) -> None: - """Compile the project.""" - _do_feature_perf_cs_collection_compile( - self, "FPCSC_ENABLE_PROJECT_SYNTHSAFIELDSENSITIVITY" - ) - - def recompile(self) -> None: - """Recompile the project.""" - _do_feature_perf_cs_collection_recompile(self) - - class SynthSAFlowSensitivity(VProject): """Synthetic case-study project for testing flow sensitivity.""" @@ -1144,3 +1083,82 @@ def compile(self) -> None: def recompile(self) -> None: """Recompile the project.""" _do_feature_perf_cs_collection_recompile(self) + + +class SynthSAFieldSensitivity(VProject): + """Synthetic case-study project for testing field sensitivity.""" + + NAME = 'SynthSAFieldSensitivity' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthSAFieldSensitivity", + refspec="origin/HEAD", + limit=None, + shallow=False, + version_filter=project_filter_generator("SynthSAFieldSensitivity") + ), + FeatureSource(), + HTTPMultiple( + local="geo-maps", + remote={ + "1.0": + "https://github.com/simonepri/geo-maps/releases/" + "download/v0.6.0" + }, + files=[ + "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", + "countries-land-1km.geo.json", "countries-land-1m.geo.json" + ] + ), + HTTPMultiple( + local="geo-maps-compr", + remote={ + "1.0": + "https://github.com/se-sic/compression-data/raw/master/" + "example_comp/geo-maps/" + }, + files=[ + "countries-land-10km.geo.json.compressed", + "countries-land-1m.geo.json.compressed", + "countries-land-250m.geo.json.compressed", + "countries-land-500m.geo.json.compressed" + ] + ), + ] + + WORKLOADS = get_ip_workloads("SynthSAFieldSensitivity", "FieldSense") + + CONTAINER = get_base_image(ImageBase.DEBIAN_12) + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthSAFieldSensitivity.NAME) + ) + + binary_map.specify_binary( + "build/bin/FieldSense", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("0a9216d769", "master") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHSAFIELDSENSITIVITY" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) From ec4b87ad938b06f5a1e3aa2347baed6f92bbc84d Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 11 Oct 2023 10:42:58 +0200 Subject: [PATCH 170/224] Fixes SynthIP bug --- .../perf_tests/feature_perf_cs_collection.py | 172 +++++------------- 1 file changed, 41 insertions(+), 131 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index ce51d9bed..df1e3b735 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -4,7 +4,7 @@ import benchbuild as bb from benchbuild.command import Command, SourceRoot, WorkloadSet -from benchbuild.project import Workloads +from benchbuild.project import Workloads, Sources from benchbuild.source import HTTPMultiple from benchbuild.utils.cmd import make, cmake, mkdir from benchbuild.utils.revision_ranges import RevisionRange @@ -793,6 +793,41 @@ def get_ip_workloads(project_source_name: str, binary_name: str) -> Workloads: } +def get_ip_data_sources() -> tp.List[Sources]: + # TODO: fix typing in benchbuild + return tp.cast( + tp.List[Sources], [ + HTTPMultiple( + local="geo-maps", + remote={ + "1.0": + "https://github.com/simonepri/geo-maps/releases/" + "download/v0.6.0" + }, + files=[ + "countries-land-10km.geo.json", + "countries-land-500m.geo.json", + "countries-land-250m.geo.json", "countries-land-1m.geo.json" + ] + ), + HTTPMultiple( + local="geo-maps-compr", + remote={ + "1.0": + "https://github.com/se-sic/compression-data/raw/master/" + "example_comp/geo-maps/" + }, + files=[ + "countries-land-10km.geo.json.compressed", + "countries-land-1m.geo.json.compressed", + "countries-land-250m.geo.json.compressed", + "countries-land-500m.geo.json.compressed" + ] + ), + ] + ) + + class SynthIPRuntime(VProject): """Synthetic case-study project for testing flow sensitivity.""" @@ -810,32 +845,7 @@ class SynthIPRuntime(VProject): version_filter=project_filter_generator("SynthIPRuntime") ), FeatureSource(), - HTTPMultiple( - local="geo-maps", - remote={ - "1.0": - "https://github.com/simonepri/geo-maps/releases/" - "download/v0.6.0" - }, - files=[ - "countries-land-10km.geo.json", "countries-land-500m.geo.json", - "countries-land-250m.geo.json", "countries-land-1m.geo.json" - ] - ), - HTTPMultiple( - local="geo-maps-compr", - remote={ - "1.0": - "https://github.com/se-sic/compression-data/raw/master/" - "example_comp/geo-maps/" - }, - files=[ - "countries-land-10km.geo.json.compressed", - "countries-land-1m.geo.json.compressed", - "countries-land-250m.geo.json.compressed", - "countries-land-500m.geo.json.compressed" - ] - ), + *get_ip_data_sources(), ] WORKLOADS = get_ip_workloads("SynthIPRuntime", "Runtime") @@ -883,32 +893,7 @@ class SynthIPTemplate(VProject): version_filter=project_filter_generator("SynthIPTemplate") ), FeatureSource(), - HTTPMultiple( - local="geo-maps", - remote={ - "1.0": - "https://github.com/simonepri/geo-maps/releases/" - "download/v0.6.0" - }, - files=[ - "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", - "countries-land-1km.geo.json", "countries-land-1m.geo.json" - ] - ), - HTTPMultiple( - local="geo-maps-compr", - remote={ - "1.0": - "https://github.com/se-sic/compression-data/raw/master/" - "example_comp/geo-maps/" - }, - files=[ - "countries-land-10km.geo.json.compressed", - "countries-land-1m.geo.json.compressed", - "countries-land-250m.geo.json.compressed", - "countries-land-500m.geo.json.compressed" - ] - ), + *get_ip_data_sources(), ] WORKLOADS = get_ip_workloads("SynthIPTemplate", "Template") @@ -956,32 +941,7 @@ class SynthIPTemplate2(VProject): version_filter=project_filter_generator("SynthIPTemplate2") ), FeatureSource(), - HTTPMultiple( - local="geo-maps", - remote={ - "1.0": - "https://github.com/simonepri/geo-maps/releases/" - "download/v0.6.0" - }, - files=[ - "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", - "countries-land-1km.geo.json", "countries-land-1m.geo.json" - ] - ), - HTTPMultiple( - local="geo-maps-compr", - remote={ - "1.0": - "https://github.com/se-sic/compression-data/raw/master/" - "example_comp/geo-maps/" - }, - files=[ - "countries-land-10km.geo.json.compressed", - "countries-land-1m.geo.json.compressed", - "countries-land-250m.geo.json.compressed", - "countries-land-500m.geo.json.compressed" - ] - ), + *get_ip_data_sources(), ] WORKLOADS = get_ip_workloads("SynthIPTemplate2", "Template2") @@ -1029,32 +989,7 @@ class SynthIPCombined(VProject): version_filter=project_filter_generator("SynthIPCombined") ), FeatureSource(), - HTTPMultiple( - local="geo-maps", - remote={ - "1.0": - "https://github.com/simonepri/geo-maps/releases/" - "download/v0.6.0" - }, - files=[ - "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", - "countries-land-1km.geo.json", "countries-land-1m.geo.json" - ] - ), - HTTPMultiple( - local="geo-maps-compr", - remote={ - "1.0": - "https://github.com/se-sic/compression-data/raw/master/" - "example_comp/geo-maps/" - }, - files=[ - "countries-land-10km.geo.json.compressed", - "countries-land-1m.geo.json.compressed", - "countries-land-250m.geo.json.compressed", - "countries-land-500m.geo.json.compressed" - ] - ), + *get_ip_data_sources(), ] WORKLOADS = get_ip_workloads("SynthIPCombined", "Combined") @@ -1102,32 +1037,7 @@ class SynthSAFieldSensitivity(VProject): version_filter=project_filter_generator("SynthSAFieldSensitivity") ), FeatureSource(), - HTTPMultiple( - local="geo-maps", - remote={ - "1.0": - "https://github.com/simonepri/geo-maps/releases/" - "download/v0.6.0" - }, - files=[ - "earth-lakes-10km.geo.json", "countries-land-10km.geo.json", - "countries-land-1km.geo.json", "countries-land-1m.geo.json" - ] - ), - HTTPMultiple( - local="geo-maps-compr", - remote={ - "1.0": - "https://github.com/se-sic/compression-data/raw/master/" - "example_comp/geo-maps/" - }, - files=[ - "countries-land-10km.geo.json.compressed", - "countries-land-1m.geo.json.compressed", - "countries-land-250m.geo.json.compressed", - "countries-land-500m.geo.json.compressed" - ] - ), + *get_ip_data_sources(), ] WORKLOADS = get_ip_workloads("SynthSAFieldSensitivity", "FieldSense") From 9d0361929b369da13a8163726ced68acc7668f50 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 11 Oct 2023 11:10:19 +0200 Subject: [PATCH 171/224] Adds special handling for LocalCommands --- varats-core/varats/project/varats_command.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index f63f56dca..f0e938b5b 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -91,9 +91,14 @@ def as_plumbum_wrapped_with( # TODO: maybe we should just provide a callable to modify the original # command if adapted_binary_location: - base_cmd.cmd.executable = base_cmd.cmd.executable.copy( - adapted_binary_location, override=True - ) + if isinstance(base_cmd, LocalCommand): + base_cmd.executable = base_cmd.executable.copy( + adapted_binary_location, override=True + ) + else: + base_cmd.cmd.executable = base_cmd.cmd.executable.copy( + adapted_binary_location, override=True + ) if wrapper_cmd: cmd = wrapper_cmd[base_cmd] From 932b02b3e51f157c3cf7db0ae3b7f1d834ecb48f Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 11 Oct 2023 14:34:36 +0200 Subject: [PATCH 172/224] - Added suggestions from code review --- varats/varats/projects/cpp_projects/hyteg.py | 25 +++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index fcd08994b..985e7643e 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -18,6 +18,20 @@ class HyTeg(VProject): + """ + C++ framework for large scale high performance finite element simulations + based on (but not limited to) matrix-free geometric multigrid. + + Note: + Currently HyTeg CANNOT be compiled with the Phasar passes activated + in vara. + Trying to do so will crash the compiler + + If you use Dune with an experiment that uses the vara compiler, + add `-mllvm --vara-disable-phasar` to the projects `cflags` to + disable phasar passes. + This will still allow to analyse compile-time variability. + """ NAME = 'HyTeg' GROUP = 'cpp_projects' DOMAIN = ProjectDomains.TEST @@ -37,7 +51,7 @@ class HyTeg(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ Command( - SourceRoot("HyTeG/build/apps/profiling") / + SourceRoot("HyTeG") / "build" / "apps" / "profiling" / RSBinary('ProfilingApp'), label='ProfilingApp' ) @@ -65,10 +79,6 @@ def compile(self) -> None: mkdir("-p", hyteg_source / "build") - # Currently Phasar passes crash the compiler - # This limits us to analysing compile time variability - self.cflags += ["-mllvm", "--vara-disable-phasar"] - cc_compiler = bb.compiler.cc(self) cxx_compiler = bb.compiler.cxx(self) @@ -83,7 +93,10 @@ def compile(self) -> None: bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) def recompile(self) -> None: - pass + hyteg_source = local.path(self.source_of(self.primary_source)) + + with local.cwd(hyteg_source / "build" / "apps" / "profiling"): + bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) def run_tests(self) -> None: pass From 9c98e59f5e9ded73342878e0bbd9d6e6597fa48f Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 11 Oct 2023 14:37:24 +0200 Subject: [PATCH 173/224] - Added suggestions from code review --- varats-core/varats/project/project_domain.py | 1 + varats/varats/projects/cpp_projects/hyteg.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/varats-core/varats/project/project_domain.py b/varats-core/varats/project/project_domain.py index d7108ecaf..071ed9605 100644 --- a/varats-core/varats/project/project_domain.py +++ b/varats-core/varats/project/project_domain.py @@ -18,6 +18,7 @@ class ProjectDomains(Enum): EDITOR = "Editor" FILE_FORMAT = "File format" HW_EMULATOR = "Hardware emulator" + HPC = "High Performance Applications" PARSER = "Parser" PLANNING = "Planning" PROG_LANG = "Programming language" diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 985e7643e..9e7b9a7ae 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -5,9 +5,9 @@ from benchbuild.utils.cmd import make, cmake, mkdir from benchbuild.utils.revision_ranges import SingleRevision from benchbuild.utils.settings import get_number_of_jobs -from experiment.workload_util import WorkloadCategory, RSBinary from plumbum import local +from varats.experiment.workload_util import WorkloadCategory, RSBinary from varats.paper.paper_config import PaperConfigSpecificGit from varats.project.project_domain import ProjectDomains from varats.project.project_util import get_local_project_git_path, BinaryType @@ -34,7 +34,7 @@ class HyTeg(VProject): """ NAME = 'HyTeg' GROUP = 'cpp_projects' - DOMAIN = ProjectDomains.TEST + DOMAIN = ProjectDomains.HPC SOURCE = [ PaperConfigSpecificGit( From ebd6767f011913855780a86a73c27c1dfa035b03 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 11 Oct 2023 16:49:18 +0200 Subject: [PATCH 174/224] - Pylint and MyPy --- varats/varats/projects/cpp_projects/hyteg.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 9e7b9a7ae..3ea9e323a 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -1,3 +1,4 @@ +"""Adds the HyTeg framework as a project to VaRA-TS.""" import typing as tp import benchbuild as bb @@ -10,7 +11,11 @@ from varats.experiment.workload_util import WorkloadCategory, RSBinary from varats.paper.paper_config import PaperConfigSpecificGit from varats.project.project_domain import ProjectDomains -from varats.project.project_util import get_local_project_git_path, BinaryType +from varats.project.project_util import ( + get_local_project_git_path, + BinaryType, + ProjectBinaryWrapper, +) from varats.project.sources import FeatureSource from varats.project.varats_project import VProject from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap @@ -75,6 +80,7 @@ def binaries_for_revision( return binaries[revision] def compile(self) -> None: + """Compile HyTeg with irrelevant settings disabled.""" hyteg_source = local.path(self.source_of(self.primary_source)) mkdir("-p", hyteg_source / "build") @@ -93,6 +99,7 @@ def compile(self) -> None: bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) def recompile(self) -> None: + """Recompiles HyTeg e.g. after a patch has been applied.""" hyteg_source = local.path(self.source_of(self.primary_source)) with local.cwd(hyteg_source / "build" / "apps" / "profiling"): From 24655c61c7617745ef11b7945a8526bc4ef35dc3 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 12 Oct 2023 11:25:46 +0200 Subject: [PATCH 175/224] Adapt instr. threshold for sebi CS --- varats/varats/experiments/vara/feature_perf_precision.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index c80e1bc8a..14288567a 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -85,6 +85,13 @@ def get_extra_cflags(project: VProject) -> tp.List[str]: def get_threshold(project: VProject) -> int: if project.DOMAIN.value is ProjectDomains.TEST: + if project.name in [ + "SynthSAFieldSensitivity", "SynthIPRuntime", "SynthIPTemplate", + "SynthIPTemplate2", "SynthIPCombined" + ]: + print("Don't instrument everything") + return 20 + return 0 return 50 From 6909287e7a26c1853607923be6db3f293a0aee8e Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 12 Oct 2023 14:41:47 +0200 Subject: [PATCH 176/224] Fixes threshold checking problem --- varats/varats/experiments/vara/feature_experiment.py | 7 ++++++- varats/varats/experiments/vara/feature_perf_precision.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/varats/varats/experiments/vara/feature_experiment.py b/varats/varats/experiments/vara/feature_experiment.py index 9926e6040..8f96019e1 100644 --- a/varats/varats/experiments/vara/feature_experiment.py +++ b/varats/varats/experiments/vara/feature_experiment.py @@ -183,18 +183,23 @@ def get_vara_tracing_cflags( Returns: list of tracing specific cflags """ c_flags = [] + if instr_type != FeatureInstrType.NONE: c_flags += ["-fsanitize=vara", f"-fvara-instr={instr_type.value}"] + c_flags += [ "-flto", "-fuse-ld=lld", "-flegacy-pass-manager", "-fno-omit-frame-pointer" ] - if instruction_threshold is not None: + + if instruction_threshold is None: # For test projects, do not exclude small regions if project is not None and project.domain == ProjectDomains.TEST: instruction_threshold = 1 + if instruction_threshold is not None: c_flags += [f"-fvara-instruction-threshold={instruction_threshold}"] + if save_temps: c_flags += ["-Wl,-plugin-opt=save-temps"] diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 14288567a..7a4eb4e96 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -84,7 +84,7 @@ def get_extra_cflags(project: VProject) -> tp.List[str]: def get_threshold(project: VProject) -> int: - if project.DOMAIN.value is ProjectDomains.TEST: + if project.DOMAIN is ProjectDomains.TEST: if project.name in [ "SynthSAFieldSensitivity", "SynthIPRuntime", "SynthIPTemplate", "SynthIPTemplate2", "SynthIPCombined" From dea50eeb6f03ec4ee07ae0f616751cf6018605fd Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 12 Oct 2023 23:02:29 +0200 Subject: [PATCH 177/224] Implements loc calculation for dune --- .../varats/tables/feature_perf_precision.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 671a97164..13493bc02 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -559,6 +559,27 @@ def _calc_folder_locs(repo_path: Path, rev_range: str, folder: str) -> int: return loc + @staticmethod + def _calc_folder_locs_dune(repo_path: Path, rev_range: str) -> int: + dune_sub_projects = [ + "dune-alugrid", "dune-common", "dune-functions", "dune-geometry", + "dune-grid", "dune-istl", "dune-localfunctions", + "dune-multidomaingrid", "dune-pdelab", "dune-typetree", + "dune-uggrid" + ] + total_locs = 0 + + total_locs += calc_repo_loc(repo_path, rev_range) + + for sub_project in dune_sub_projects: + sub_project_path = repo_path / sub_project + # TODO: get sub_rpoject hashes + locs = calc_repo_loc(sub_project_path, "HEAD") + # print(f"Calculated {locs} for {sub_project_path}") + total_locs += locs + + return total_locs + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_loaded_paper_config().get_all_case_studies() profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] @@ -581,6 +602,8 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: locs = self._calc_folder_locs( project_git_path, rev.hash, src_folder ) + elif case_study.project_cls.NAME == "DunePerfRegression": + locs = self._calc_folder_locs_dune(project_git_path, rev.hash) else: locs = calc_repo_loc(project_git_path, rev.hash) From 8ad5975effd0b2a9df4d4889a86076bf26e21430 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 17 Oct 2023 14:28:35 +0200 Subject: [PATCH 178/224] Small fixes and error reduction --- .../data/databases/feature_perf_precision_database.py | 10 ++++++++-- varats/varats/plots/feature_perf_precision.py | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 9258710df..7567e37d5 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -68,12 +68,13 @@ def get_matching_event( open_events.remove(event) return event - LOG.error( + LOG.debug( f"Could not find matching start for Event {repr(closing_event)}." ) return None + found_missing_open_event = False for trace_event in tef_report.trace_events: if trace_event.category == "Feature": if trace_event.event_type == TraceEventType.DURATION_EVENT_BEGIN: @@ -83,6 +84,7 @@ def get_matching_event( elif trace_event.event_type == TraceEventType.DURATION_EVENT_END: opening_event = get_matching_event(open_events, trace_event) if not opening_event: + found_missing_open_event = True continue end_timestamp = trace_event.timestamp @@ -119,7 +121,11 @@ def get_matching_event( ) if open_events: - LOG.error(f"Not all events have been closed: {open_events}.") + LOG.error("Not all events have been correctly closed.") + LOG.debug(f"Events = {open_events}.") + + if found_missing_open_event: + LOG.error("Not all events have been correctly opened.") return feature_performances diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 554f7310f..4d69df87c 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -139,7 +139,7 @@ def plot(self, view_mode: bool) -> None: x="Profiler", y="value", hue="metric", - jitter=0.1, + jitter=0.15, dodge=True, linewidth=0.5, marker='x', @@ -147,7 +147,7 @@ def plot(self, view_mode: bool) -> None: mcolors.CSS4_COLORS['dimgrey'], mcolors.CSS4_COLORS['darkgrey'] ], - size=4, + size=7, ax=ax ) From f569a36a57e3f7cafb61f304151c82ca70ea8e34 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 17 Oct 2023 19:58:13 +0200 Subject: [PATCH 179/224] Hyteg fixes --- varats/varats/projects/cpp_projects/hyteg.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 3ea9e323a..20f1de139 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -17,6 +17,7 @@ ProjectBinaryWrapper, ) from varats.project.sources import FeatureSource +from varats.project.varats_command import VCommand from varats.project.varats_project import VProject from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap from varats.utils.settings import bb_cfg @@ -55,8 +56,8 @@ class HyTeg(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - Command( - SourceRoot("HyTeG") / "build" / "apps" / "profiling" / + VCommand( + SourceRoot("HyTeg") / "build" / "apps" / "profiling" / RSBinary('ProfilingApp'), label='ProfilingApp' ) @@ -70,7 +71,7 @@ def binaries_for_revision( binaries = RevisionBinaryMap(get_local_project_git_path(HyTeg.NAME)) binaries.specify_binary( - "build/apps/profiling/ProfilingApp", + "ProfilingApp", BinaryType.EXECUTABLE, only_valid_in=SingleRevision( "f4711dadc3f61386e6ccdc704baa783253332db2" From 80fc3d00a077609f20ab666e1ef2605339954fcb Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 18 Oct 2023 11:39:18 +0200 Subject: [PATCH 180/224] Adapts instr threshold --- varats/varats/experiments/vara/feature_perf_precision.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 7a4eb4e96..606ce51fc 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -90,11 +90,11 @@ def get_threshold(project: VProject) -> int: "SynthIPTemplate2", "SynthIPCombined" ]: print("Don't instrument everything") - return 20 + return 10 return 0 - return 50 + return 100 class AnalysisProjectStepBase(OutputFolderStep): From 359c5dfa27f3979197be537b5c86974d3966aa8d Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 18 Oct 2023 11:41:05 +0200 Subject: [PATCH 181/224] Adapts workload selection --- .../vara/feature_perf_precision.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 606ce51fc..98d4d9c72 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -57,11 +57,22 @@ def perf_prec_workload_commands( project: VProject, binary: ProjectBinaryWrapper ) -> tp.List[ProjectCommand]: """Uniformly select the workloads that should be processed.""" - return workload_commands(project, binary, [ - WorkloadCategory.EXAMPLE - ]) + workload_commands(project, binary, [ - WorkloadCategory.SMALL - ]) + workload_commands(project, binary, [WorkloadCategory.MEDIUM]) + + wl_commands = [] + + if not project.name.startswith( + "SynthIP" + ) and project.name != "SynthSAFieldSensitivity": + # Example commands from these CS are to "fast" + wl_commands += workload_commands( + project, binary, [WorkloadCategory.EXAMPLE] + ) + + wl_commands += workload_commands(project, binary, [WorkloadCategory.SMALL]) + + wl_commands += workload_commands(project, binary, [WorkloadCategory.MEDIUM]) + + return wl_commands def select_project_binaries(project: VProject) -> tp.List[ProjectBinaryWrapper]: From 88caccaef473d16dbd2aff2baf4d357d453fc6a2 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 18 Oct 2023 12:09:02 +0200 Subject: [PATCH 182/224] Introduces new CS --- .../perf_tests/feature_perf_cs_collection.py | 233 ++++++++++++++++++ 1 file changed, 233 insertions(+) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index fda625872..33316127d 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -1130,3 +1130,236 @@ def compile(self) -> None: def recompile(self) -> None: """Recompile the project.""" _do_feature_perf_cs_collection_recompile(self) + + +class SynthCTTraitBased(VProject): + """Synthetic case-study project for testing flow sensitivity.""" + + NAME = 'SynthCTTraitBased' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthCTTraitBased", + refspec="origin/f-SynthCompileTimeCS", + limit=None, + shallow=False, + version_filter=project_filter_generator("SynthCTTraitBased") + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot("SynthCTTraitBased") / RSBinary("CTTraitBased"), + label="CompileTime-TraitBased" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthCTTraitBased.NAME) + ) + + binary_map.specify_binary( + "build/bin/CTTraitBased", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("a4a133a186", "HEAD") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHCTTRAITBASED" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + + +class SynthCTPolicies(VProject): + """Synthetic case-study project for compile time variability using + policies.""" + + NAME = 'SynthCTPolicies' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthCTPolicies", + refspec="origin/f-SynthCompileTimeCS", + limit=None, + shallow=False, + version_filter=project_filter_generator("SynthCTPolicies") + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot("SynthCTPolicies") / RSBinary("CTPolicies"), + label="CompileTime-Policies" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthCTPolicies.NAME) + ) + + binary_map.specify_binary( + "build/bin/CTPolicies", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("a4a133a186", "HEAD") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHCTPOLICIES" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + + +class SynthCTCRTP(VProject): + """Synthetic case-study project for compile time variability using CRTP.""" + + NAME = 'SynthCTCRTP' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local=NAME, + refspec="origin/f-SynthCompileTimeCS", + limit=None, + shallow=False, + version_filter=project_filter_generator(NAME) + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot(NAME) / RSBinary("CTCRTP"), label="CompileTime-CRTP" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthCTCRTP.NAME) + ) + + binary_map.specify_binary( + "build/bin/CTCRTP", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("a4a133a186", "HEAD") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHCTCRTP" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + + +class SynthCTTemplateSpecialization(VProject): + """Synthetic case-study project for compile time variability using template + specialization.""" + + NAME = 'SynthCTTemplateSpecialization' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local=NAME, + refspec="origin/f-SynthCompileTimeCS", + limit=None, + shallow=False, + version_filter=project_filter_generator(NAME) + ), + FeatureSource() + ] + + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot(NAME) / RSBinary("CTTemplateSpecialization"), + label="CompileTime-Template-Specialization" + ) + ] + } + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap( + get_local_project_git_path(SynthCTTemplateSpecialization.NAME) + ) + + binary_map.specify_binary( + "build/bin/CTTemplateSpecialization", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("a4a133a186", "HEAD") + ) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHCTSPECIALIZATION" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) From 1a773cac4b9fee0b559bf367252b0cc767fe6bda Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 18 Oct 2023 15:52:48 +0200 Subject: [PATCH 183/224] Fixes Synth commit ranges --- .../perf_tests/feature_perf_cs_collection.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 33316127d..7c1b20e00 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -1143,7 +1143,7 @@ class SynthCTTraitBased(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthCTTraitBased", - refspec="origin/f-SynthCompileTimeCS", + refspec="origin/HEAD", limit=None, shallow=False, version_filter=project_filter_generator("SynthCTTraitBased") @@ -1171,7 +1171,7 @@ def binaries_for_revision( binary_map.specify_binary( "build/bin/CTTraitBased", BinaryType.EXECUTABLE, - only_valid_in=RevisionRange("a4a133a186", "HEAD") + only_valid_in=RevisionRange("6d50a6efd5", "master") ) return binary_map[revision] @@ -1202,7 +1202,7 @@ class SynthCTPolicies(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthCTPolicies", - refspec="origin/f-SynthCompileTimeCS", + refspec="origin/HEAD", limit=None, shallow=False, version_filter=project_filter_generator("SynthCTPolicies") @@ -1230,7 +1230,7 @@ def binaries_for_revision( binary_map.specify_binary( "build/bin/CTPolicies", BinaryType.EXECUTABLE, - only_valid_in=RevisionRange("a4a133a186", "HEAD") + only_valid_in=RevisionRange("6d50a6efd5", "master") ) return binary_map[revision] @@ -1260,7 +1260,7 @@ class SynthCTCRTP(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local=NAME, - refspec="origin/f-SynthCompileTimeCS", + refspec="origin/HEAD", limit=None, shallow=False, version_filter=project_filter_generator(NAME) @@ -1287,7 +1287,7 @@ def binaries_for_revision( binary_map.specify_binary( "build/bin/CTCRTP", BinaryType.EXECUTABLE, - only_valid_in=RevisionRange("a4a133a186", "HEAD") + only_valid_in=RevisionRange("6d50a6efd5", "master") ) return binary_map[revision] @@ -1318,7 +1318,7 @@ class SynthCTTemplateSpecialization(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local=NAME, - refspec="origin/f-SynthCompileTimeCS", + refspec="origin/HEAD", limit=None, shallow=False, version_filter=project_filter_generator(NAME) @@ -1346,7 +1346,7 @@ def binaries_for_revision( binary_map.specify_binary( "build/bin/CTTemplateSpecialization", BinaryType.EXECUTABLE, - only_valid_in=RevisionRange("a4a133a186", "HEAD") + only_valid_in=RevisionRange("6d50a6efd5", "master") ) return binary_map[revision] From 3da6c136aeb26a5d5c019be3a90e05009e39fe62 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 18 Oct 2023 16:34:19 +0200 Subject: [PATCH 184/224] Fixes command --- varats/varats/projects/cpp_projects/dune.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/projects/cpp_projects/dune.py b/varats/varats/projects/cpp_projects/dune.py index fcc50994b..4d3f58a91 100644 --- a/varats/varats/projects/cpp_projects/dune.py +++ b/varats/varats/projects/cpp_projects/dune.py @@ -115,7 +115,7 @@ class DunePerfRegression(VProject): label='poisson_yasp_q2_3d', creates=['poisson-yasp-q2-3d.vtu'] ), - Command( + VCommand( SourceRoot( "dune-VaRA/dune-performance-regressions/build-cmake/src" ) / RSBinary('poisson_alugrid'), From d2f6b5b43cbd15042519b64141e2c306faf733c6 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 20 Oct 2023 08:34:14 +0200 Subject: [PATCH 185/224] Straiten out checking --- .../feature_perf_precision_database.py | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 7567e37d5..fa93ff63d 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -143,6 +143,10 @@ def precise_pim_regression_check( continue new_values = current_pim[feature] + if np.mean(old_values) < 20 and np.mean(new_values) < 20: + # TODO: adapt this to a relative value + continue + ttest_res = ttest_ind(old_values, new_values) # TODO: check, maybe we need a "very small value cut off" @@ -152,8 +156,8 @@ def precise_pim_regression_check( else: print(f"Could not find feature {feature} in new trace.") # TODO: how to handle this? - raise NotImplementedError() - is_regression = True + # raise NotImplementedError() + # is_regression = True return is_regression @@ -171,12 +175,17 @@ def cliffs_delta_pim_regression_check( continue new_values = current_pim[feature] + # if np.mean(old_values) < 20 and np.mean(new_values) < 20: + # # TODO: adapt this to a relative value + # continue + d, res = cliffs_delta(old_values, new_values) # print(f"{d=}, {res=}") # if d > 0.70 or d < -0.7: - if res == "large": + # if res == "large": + if d > 0.7 or d < -0.7: # print( # f"{self.name} found regression for feature {feature}." # ) @@ -184,8 +193,8 @@ def cliffs_delta_pim_regression_check( else: print(f"Could not find feature {feature} in new trace.") # TODO: how to handle this? - raise NotImplementedError() - is_regression = True + # raise NotImplementedError() + # is_regression = True return is_regression @@ -199,23 +208,25 @@ def sum_pim_regression_check( old_values for feature, old_values in baseline_pim.items() if feature != "Base" ] - print(f"{baseline_pim_totals=}") current_pim_totals: tp.List[tp.List[int]] = [ current_values for feature, current_values in current_pim.items() if feature != "Base" ] - print(f"{current_pim_totals=}") baseline_pim_total: tp.List[int] = [ sum(values) for values in zip(*baseline_pim_totals) ] - print(f"{baseline_pim_total=}") current_pim_total: tp.List[int] = [ sum(values) for values in zip(*current_pim_totals) ] - print(f"{current_pim_total=}") - # TODO: does not work for large numbers + if not baseline_pim_total and not current_pim_total: + # How do we get here? + return False + + # d, res = cliffs_delta(baseline_pim_total, current_pim_total) + # # return res == "large" + # return d > 0.6 or d < -0.6 return ttest_ind(baseline_pim_total, current_pim_total).pvalue < 0.05 @@ -226,6 +237,7 @@ def pim_regression_check( """Compares two pims and determines if there was a regression between the baseline and current.""" # return cliffs_delta_pim_regression_check(baseline_pim, current_pim) + # return sum_pim_regression_check(baseline_pim, current_pim) return precise_pim_regression_check(baseline_pim, current_pim) @@ -457,13 +469,18 @@ def get_regressing_config_ids_gt( ) == np.mean(new_time.measurements_wall_clock_time): gt[config_id] = False else: - # TODO: fix to use same check as profilers + # d, res = cliffs_delta( + # old_time.measurements_wall_clock_time, + # new_time.measurements_wall_clock_time + # ) - # TODO: double check ttest handling ttest_res = ttest_ind( old_time.measurements_wall_clock_time, new_time.measurements_wall_clock_time ) + + # if res == "large": + # if d > 0.7 or d < -0.7: if ttest_res.pvalue < 0.05: gt[config_id] = True else: @@ -521,6 +538,7 @@ def compute_profiler_predictions( return None try: + #print(f"{report_files[0]}") result_dict[config_id] = profiler.is_regression( report_files[0], patch_name ) @@ -531,6 +549,7 @@ def compute_profiler_predictions( ) print(exception) print(traceback.format_exc()) + # raise exception return result_dict From 8fab50f1b7af47761158404e65330379ed11e823 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 23 Oct 2023 09:41:20 +0200 Subject: [PATCH 186/224] Adapts hyteg setup --- varats-core/varats/utils/git_commands.py | 6 +++++- varats/varats/projects/cpp_projects/hyteg.py | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/varats-core/varats/utils/git_commands.py b/varats-core/varats/utils/git_commands.py index eeb7b9111..3eb89b839 100644 --- a/varats-core/varats/utils/git_commands.py +++ b/varats-core/varats/utils/git_commands.py @@ -52,11 +52,15 @@ def init_all_submodules(folder: Path) -> None: git("-C", folder.absolute(), "submodule", "init") -def update_all_submodules(folder: Path, recursive: bool = True) -> None: +def update_all_submodules( + folder: Path, recursive: bool = True, init: bool = False +) -> None: """Updates all submodules.""" git_params = ["submodule", "update"] if recursive: git_params.append("--recursive") + if init: + git_params.append("--init") git("-C", folder, git_params) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 20f1de139..e3af6bd36 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -19,6 +19,7 @@ from varats.project.sources import FeatureSource from varats.project.varats_command import VCommand from varats.project.varats_project import VProject +from varats.utils.git_commands import init_all_submodules, update_all_submodules from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap from varats.utils.settings import bb_cfg @@ -45,7 +46,7 @@ class HyTeg(VProject): SOURCE = [ PaperConfigSpecificGit( project_name="HyTeg", - remote="git@github.com:se-sic/hyteg-VaRA.git", + remote="https://github.com/se-sic/hyteg-VaRA.git", local="HyTeg", refspec="origin/HEAD", limit=None, @@ -86,6 +87,8 @@ def compile(self) -> None: mkdir("-p", hyteg_source / "build") + update_all_submodules(hyteg_source, recursive=True, init=True) + cc_compiler = bb.compiler.cc(self) cxx_compiler = bb.compiler.cxx(self) From f581984acf5e75170d92912223b730ef45d7c7b4 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 25 Oct 2023 09:53:10 +0200 Subject: [PATCH 187/224] Disable phasar for hyteg --- varats/varats/experiments/vara/feature_perf_precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 98d4d9c72..b5af04943 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -87,7 +87,7 @@ def select_project_binaries(project: VProject) -> tp.List[ProjectBinaryWrapper]: def get_extra_cflags(project: VProject) -> tp.List[str]: - if project.name == "DunePerfRegression": + if project.name in ["DunePerfRegression", "HyTeg"]: # Disable phasar for dune as the analysis cannot handle dunes size return ["-mllvm", "--vara-disable-phasar"] From 5669bd17e77d6eedade479873c504b56061bd381 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 25 Oct 2023 09:55:12 +0200 Subject: [PATCH 188/224] Checkout submodules for HyTeg --- varats-core/varats/utils/git_commands.py | 6 +++++- varats/varats/projects/cpp_projects/hyteg.py | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/varats-core/varats/utils/git_commands.py b/varats-core/varats/utils/git_commands.py index eeb7b9111..3eb89b839 100644 --- a/varats-core/varats/utils/git_commands.py +++ b/varats-core/varats/utils/git_commands.py @@ -52,11 +52,15 @@ def init_all_submodules(folder: Path) -> None: git("-C", folder.absolute(), "submodule", "init") -def update_all_submodules(folder: Path, recursive: bool = True) -> None: +def update_all_submodules( + folder: Path, recursive: bool = True, init: bool = False +) -> None: """Updates all submodules.""" git_params = ["submodule", "update"] if recursive: git_params.append("--recursive") + if init: + git_params.append("--init") git("-C", folder, git_params) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 5de8f13b4..ed9e388ed 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -19,6 +19,7 @@ from varats.project.sources import FeatureSource from varats.project.varats_command import VCommand from varats.project.varats_project import VProject +from varats.utils.git_commands import init_all_submodules, update_all_submodules from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap from varats.utils.settings import bb_cfg @@ -86,6 +87,8 @@ def compile(self) -> None: mkdir("-p", hyteg_source / "build") + update_all_submodules(hyteg_source, recursive=True, init=True) + cc_compiler = bb.compiler.cc(self) cxx_compiler = bb.compiler.cxx(self) From ff7f3038387e8e55880f18e7a6b34936cee1adc6 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 25 Oct 2023 12:21:44 +0200 Subject: [PATCH 189/224] Apply suggestions from code review --- varats/varats/projects/cpp_projects/hyteg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index ed9e388ed..6f2c8a5e1 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -19,7 +19,7 @@ from varats.project.sources import FeatureSource from varats.project.varats_command import VCommand from varats.project.varats_project import VProject -from varats.utils.git_commands import init_all_submodules, update_all_submodules +from varats.utils.git_commands import update_all_submodules from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap from varats.utils.settings import bb_cfg From b303116150436bf60f0bbf118a9dc7b237bd5a51 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Mon, 30 Oct 2023 20:10:22 +0100 Subject: [PATCH 190/224] - Adapt HyTeg to be able to compile with external Eigen directory set via environment variables --- varats/varats/projects/cpp_projects/hyteg.py | 34 ++++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 6f2c8a5e1..fd18ccb91 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -1,9 +1,11 @@ """Adds the HyTeg framework as a project to VaRA-TS.""" +import logging +import os import typing as tp import benchbuild as bb from benchbuild.command import WorkloadSet, SourceRoot -from benchbuild.utils.cmd import make, cmake, mkdir +from benchbuild.utils.cmd import ninja, cmake, mkdir from benchbuild.utils.revision_ranges import SingleRevision from benchbuild.utils.settings import get_number_of_jobs from plumbum import local @@ -19,10 +21,12 @@ from varats.project.sources import FeatureSource from varats.project.varats_command import VCommand from varats.project.varats_project import VProject -from varats.utils.git_commands import update_all_submodules +from varats.utils.git_commands import init_all_submodules, update_all_submodules from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap from varats.utils.settings import bb_cfg +LOG = logging.getLogger(__name__) + class HyTeg(VProject): """ @@ -92,22 +96,32 @@ def compile(self) -> None: cc_compiler = bb.compiler.cc(self) cxx_compiler = bb.compiler.cxx(self) + cmake_args = [ + "-G", "Ninja", "..", "-DWALBERLA_BUILD_WITH_MPI=OFF", + "-DHYTEG_BUILD_DOC=OFF" + ] + + if (eigen_path := os.getenv("EIGEN_PATH")): + cmake_args.append(f"-DEIGEN_DIR={eigen_path}") + else: + LOG.warning( + "EIGEN_PATH environment variable not set! This will cause compilation errors when using " + "configurations" + ) + with local.cwd(hyteg_source / "build"): with local.env(CC=str(cc_compiler), CXX=str(cxx_compiler)): - bb.watch(cmake)( - "..", "-DWALBERLA_BUILD_WITH_MPI=OFF", - "-DHYTEG_BUILD_DOC=OFF" - ) + bb.watch(cmake)(*cmake_args) - with local.cwd(hyteg_source / "build" / "apps" / "profiling"): - bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) + with local.cwd(hyteg_source / "build"): + bb.watch(ninja)("ProfilingApp") def recompile(self) -> None: """Recompiles HyTeg e.g. after a patch has been applied.""" hyteg_source = local.path(self.source_of(self.primary_source)) - with local.cwd(hyteg_source / "build" / "apps" / "profiling"): - bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) + with local.cwd(hyteg_source / "build"): + bb.watch(ninja)("ProfilingApp") def run_tests(self) -> None: pass From 775a8fdadc5fdac6c53a6729e51602da588d20f3 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Mon, 30 Oct 2023 20:20:34 +0100 Subject: [PATCH 191/224] - Added information on special handling with HyTeg and configurations --- varats/varats/projects/cpp_projects/hyteg.py | 22 ++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index fd18ccb91..599473e24 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -33,15 +33,29 @@ class HyTeg(VProject): C++ framework for large scale high performance finite element simulations based on (but not limited to) matrix-free geometric multigrid. - Note: - Currently HyTeg CANNOT be compiled with the Phasar passes activated - in vara. - Trying to do so will crash the compiler + Notes: + 1. + Currently, HyTeg CANNOT be compiled with the Phasar passes activated + in vara. Trying to do so will crash the compiler If you use Dune with an experiment that uses the vara compiler, add `-mllvm --vara-disable-phasar` to the projects `cflags` to disable phasar passes. This will still allow to analyse compile-time variability. + + 2. + Due to the way that benchbuild generates the build folder names when running experiments in different + configurations, HyTeg currently DOES NOT work out of the box when creating a case study with multiple + configurations. This is due to benchbuild creating a temporary folder name with a comma in it to separate + the revision and configuration id. + This comma will be misinterpreted when the path for the eigen library is passed onto the linker. + + There is a limited workaround for this: + 1. Copy the eigen library revision that you want HyTeg to use to some other accessible location (That has + no comma in its absolute path) + 2. Set the environment variable EIGEN_PATH to point to the absolute path of that directory + - This can be achieved by either EXPORT-ing it manually, adding it to your .benchbuild.yml configuration or + (when running with slurm) adding the export to your slurm scripts """ NAME = 'HyTeg' GROUP = 'cpp_projects' From 930b87079a649c7a6d07b743e15c931a946b7e8c Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Mon, 30 Oct 2023 20:34:07 +0100 Subject: [PATCH 192/224] - Pre-commit --- varats/varats/projects/cpp_projects/hyteg.py | 29 ++++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 599473e24..18d6aa869 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -44,18 +44,23 @@ class HyTeg(VProject): This will still allow to analyse compile-time variability. 2. - Due to the way that benchbuild generates the build folder names when running experiments in different - configurations, HyTeg currently DOES NOT work out of the box when creating a case study with multiple - configurations. This is due to benchbuild creating a temporary folder name with a comma in it to separate - the revision and configuration id. - This comma will be misinterpreted when the path for the eigen library is passed onto the linker. + Due to the way that benchbuild generates the build folder names when + running experiments in different configurations, HyTeg currently DOES + NOT work out of the box when creating a case study with multiple + configurations. This is due to benchbuild creating a temporary folder + name with a comma in it to separate the revision and configuration + id. + This comma will be misinterpreted when the path for the eigen library + is passed onto the linker. There is a limited workaround for this: - 1. Copy the eigen library revision that you want HyTeg to use to some other accessible location (That has - no comma in its absolute path) - 2. Set the environment variable EIGEN_PATH to point to the absolute path of that directory - - This can be achieved by either EXPORT-ing it manually, adding it to your .benchbuild.yml configuration or - (when running with slurm) adding the export to your slurm scripts + 1. Copy the eigen library revision that you want HyTeg to use to some + other accessible location (That has no comma in its absolute path) + 2. Set the environment variable EIGEN_PATH to point to the absolute + path of that directory + - This can be achieved by either EXPORT-ing it manually, adding it + to your .benchbuild.yml configuration or (when running with slurm) + adding the export to your slurm scripts """ NAME = 'HyTeg' GROUP = 'cpp_projects' @@ -119,8 +124,8 @@ def compile(self) -> None: cmake_args.append(f"-DEIGEN_DIR={eigen_path}") else: LOG.warning( - "EIGEN_PATH environment variable not set! This will cause compilation errors when using " - "configurations" + "EIGEN_PATH environment variable not set! This will cause" + " compilation errors when using configurations" ) with local.cwd(hyteg_source / "build"): From 34ede35a114a53e20338fe99a493b7b91cd7081a Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 31 Oct 2023 14:07:04 +0100 Subject: [PATCH 193/224] Apply suggestions from code review --- varats/varats/projects/cpp_projects/hyteg.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 18d6aa869..62fee6190 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -21,7 +21,6 @@ from varats.project.sources import FeatureSource from varats.project.varats_command import VCommand from varats.project.varats_project import VProject -from varats.utils.git_commands import init_all_submodules, update_all_submodules from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap from varats.utils.settings import bb_cfg @@ -110,8 +109,6 @@ def compile(self) -> None: mkdir("-p", hyteg_source / "build") - update_all_submodules(hyteg_source, recursive=True, init=True) - cc_compiler = bb.compiler.cc(self) cxx_compiler = bb.compiler.cxx(self) From dab6199b202c0e0f81e6a19f5ce35159837320eb Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 14 Nov 2023 09:58:18 +0100 Subject: [PATCH 194/224] Switch to new disable compiler flag --- varats/varats/experiments/vara/feature_perf_precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index b5af04943..04df6f8c1 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -89,7 +89,7 @@ def select_project_binaries(project: VProject) -> tp.List[ProjectBinaryWrapper]: def get_extra_cflags(project: VProject) -> tp.List[str]: if project.name in ["DunePerfRegression", "HyTeg"]: # Disable phasar for dune as the analysis cannot handle dunes size - return ["-mllvm", "--vara-disable-phasar"] + return ["-fvara-disable-phasar"] return [] From 0c9df7e7d8f6935395cfea4977356d247ae28072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Tue, 14 Nov 2023 10:00:43 +0100 Subject: [PATCH 195/224] Adds synthetic feature projects --- .../perf_tests/feature_perf_cs_collection.py | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 247b7491b..853efee77 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -1309,3 +1309,97 @@ def compile(self) -> None: def recompile(self) -> None: """Recompile the project.""" _do_feature_perf_cs_collection_recompile(self) + + +class SynthFeatureLargeConfigSpace(VProject): + """Synthetic case-study project for testing flow sensitivity.""" + + NAME = 'SynthFeatureLargeConfigSpace' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthFeatureLargeConfigSpace", + refspec="origin/f-FeatureCaseStudies", + limit=None, + shallow=False, + version_filter=project_filter_generator( + "SynthFeatureLargeConfigSpace" + ) + ), + FeatureSource() + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + return RevisionBinaryMap( + get_local_project_git_path(SynthFeatureLargeConfigSpace.NAME) + ).specify_binary( + "build/bin/LargeConfigSpace", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("6863c78c24", "HEAD") + )[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHFEATURELARGECONFIGSPACE" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) + + +class SynthFeatureRestrictedConfigSpace(VProject): + """Synthetic case-study project for testing flow sensitivity.""" + + NAME = 'SynthFeatureRestrictedConfigSpace' + GROUP = 'perf_tests' + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + bb.source.Git( + remote="https://github.com/se-sic/FeaturePerfCSCollection.git", + local="SynthFeatureRestrictedConfigSpace", + refspec="origin/f-FeatureCaseStudies", + limit=None, + shallow=False, + version_filter=project_filter_generator( + "SynthFeatureRestrictedConfigSpace" + ) + ), + FeatureSource() + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash # pylint: disable=W0613 + ) -> tp.List[ProjectBinaryWrapper]: + return RevisionBinaryMap( + get_local_project_git_path(SynthFeatureRestrictedConfigSpace.NAME) + ).specify_binary( + "build/bin/RestrictedConfigSpace", + BinaryType.EXECUTABLE, + only_valid_in=RevisionRange("6863c78c24", "HEAD") + )[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + _do_feature_perf_cs_collection_compile( + self, "FPCSC_ENABLE_PROJECT_SYNTHFEATURERESTRICTEDCONFIGSPACE" + ) + + def recompile(self) -> None: + """Recompile the project.""" + _do_feature_perf_cs_collection_recompile(self) From 5d3b8cab7bd95feddcce71de74ca7c1ae01c8457 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 14 Nov 2023 10:47:39 +0100 Subject: [PATCH 196/224] Fixes feature interaction name cleaning for pim traces --- .../feature_perf_precision_database.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index fa93ff63d..3db01a046 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -29,19 +29,21 @@ LOG = logging.getLogger(__name__) -def get_interactions_from_fr_string(interactions: str) -> str: +def get_interactions_from_fr_string(interactions: str, sep: str = ",") -> str: """Convert the feature strings in a TEFReport from FR(x,y) to x*y, similar to the format used by SPLConqueror.""" interactions = ( interactions.replace("FR", "").replace("(", "").replace(")", "") ) - interactions_list = interactions.split(",") + interactions_list = interactions.split(sep) + + # Features cannot interact with itself, so remove duplicates + interactions_list = list(set(interactions_list)) + # Ignore interactions with base, but do not remove base if it's the only # feature if "Base" in interactions_list and len(interactions_list) > 1: interactions_list.remove("Base") - # Features cannot interact with itself, so remove duplicastes - interactions_list = list(set(interactions_list)) interactions_str = "*".join(interactions_list) @@ -336,7 +338,8 @@ def __aggregate_pim_data(reports) -> tp.DefaultDict[str, tp.List[int]]: name = get_interactions_from_fr_string( old_pim_report._translate_interaction( region_inter.interaction - ) + ), + sep="*" ) per_report_acc_pim[name] += region_inter.time @@ -363,9 +366,9 @@ def is_regression( raise NotImplementedError() new_acc_pim = self.__aggregate_pim_data(opt_mr.reports()) - except Exception as e: + except Exception as exc: print(f"FAILURE: Report parsing failed: {report_path}") - print(e) + print(exc) return False return pim_regression_check(old_acc_pim, new_acc_pim) From 9f3a9120d36aafda0ec266c991fa264bed5ebb76 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 14 Nov 2023 10:48:31 +0100 Subject: [PATCH 197/224] Catch exceptions and report errors instead of failing --- .../feature_perf_precision_database.py | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 3db01a046..a65d76cea 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -156,7 +156,11 @@ def precise_pim_regression_check( # print(f"Found regression for feature {feature}.") is_regression = True else: - print(f"Could not find feature {feature} in new trace.") + if np.mean(old_values) > 20: + print( + f"Could not find feature {feature} in new trace. " + f"({np.mean(old_values)}us lost)" + ) # TODO: how to handle this? # raise NotImplementedError() # is_regression = True @@ -429,7 +433,12 @@ def get_patch_names(case_study: CaseStudy) -> tp.List[str]: return [] # TODO: fix to prevent double loading - time_reports = fpp.MPRTimeReportAggregate(report_files[0].full_path()) + try: + time_reports = fpp.MPRTimeReportAggregate(report_files[0].full_path()) + except: + print(f"Could not load report from: {report_files[0]}") + return [] + return time_reports.get_patch_names() @@ -709,9 +718,12 @@ def load_precision_data(case_studies, profilers) -> pd.DataFrame: rev = case_study.revisions[0] project_name = case_study.project_name - ground_truth = get_regressing_config_ids_gt( - project_name, case_study, rev, patch_name - ) + try: + ground_truth = get_regressing_config_ids_gt( + project_name, case_study, rev, patch_name + ) + except: + continue for profiler in profilers: new_row = { From a28b3fb75e4355ab806d131abc8d7c0110dd47f1 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 14 Nov 2023 10:49:53 +0100 Subject: [PATCH 198/224] Adds raw memory stats to table --- .../varats/tables/feature_perf_precision.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 13493bc02..b587b0943 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -420,7 +420,7 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: columns='Profiler', values=[ 'precision', 'recall', 'overhead_time_rel', - 'overhead_memory_rel' + 'overhead_memory_rel', 'overhead_memory' ] ) @@ -430,7 +430,8 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: # print(f"pivot_df=\n{pivot_df}") columns = [ - 'precision', 'recall', 'overhead_time_rel', 'overhead_memory_rel' + 'precision', 'recall', 'overhead_time_rel', 'overhead_memory_rel', + 'overhead_memory' ] pivot_df = pivot_df.reindex([ (prof.name, c) for prof in profilers for c in columns @@ -446,12 +447,14 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: # Rename columns overhead_time_c_name = "$\Delta$ Time $(\%)$" overhead_memory_c_name = "$\Delta$ Mem $(\%)$" + overhead_memory_val_c_name = "$\Delta$ Mem $(Kbyte)$" pivot_df = pivot_df.rename( columns={ "precision": "Precision", "recall": "Recall", "overhead_time_rel": overhead_time_c_name, "overhead_memory_rel": overhead_memory_c_name, + "overhead_memory": overhead_memory_val_c_name, } ) @@ -463,7 +466,10 @@ def add_extras(doc: Document) -> None: doc.packages.append(Package("amssymb")) if table_format.is_latex(): - style.format(precision=2) + mv_columns = [ + (prof.name, overhead_memory_val_c_name) for prof in profilers + ] + style.format({col: "{:.0f}" for col in mv_columns}, precision=2) ryg_map = plt.get_cmap('RdYlGn') ryg_map = cmap_map(lambda x: x / 1.2 + 0.2, ryg_map) @@ -500,8 +506,18 @@ def add_extras(doc: Document) -> None: vmax=100.0, ) + # style.background_gradient( + # cmap=gray_map, + # subset=[(prof.name, overhead_memory_val_c_name) + # for prof in profilers], + # vmin=0.0, + # # vmax=100.0, + # ) + kwargs["convert_css"] = True - kwargs["column_format"] = "l" + "".join(["rrrr" for _ in profilers]) + kwargs["column_format"] = "l" + "".join([ + "rrrrr" for _ in profilers + ]) kwargs["hrules"] = True kwargs["multicol_align"] = "c" From 57a8c1db99df24018450ad633ff9f00cf7f01810 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 14 Nov 2023 10:50:33 +0100 Subject: [PATCH 199/224] Better debug printing --- varats/varats/plots/feature_perf_precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 4d69df87c..eeed847c2 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -59,7 +59,7 @@ def plot(self, view_mode: bool) -> None: df = load_precision_data(case_studies, profilers) # df = pd.concat([df, pd.DataFrame(get_fake_prec_rows())]) df.sort_values(["CaseStudy"], inplace=True) - print(f"{df=}") + print(f"df=\n{df.to_string()}") grid = multivariate_grid( df, From cd38950c0373ab86bc3f8d1014bce64b4ffdb2a1 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 14 Nov 2023 10:50:51 +0100 Subject: [PATCH 200/224] Report parse errors --- varats-core/varats/report/tef_report.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/varats-core/varats/report/tef_report.py b/varats-core/varats/report/tef_report.py index c3a4557b1..84b38733e 100644 --- a/varats-core/varats/report/tef_report.py +++ b/varats-core/varats/report/tef_report.py @@ -130,7 +130,11 @@ def infer_name(self, name_id: int) -> str: def __init__(self, path: Path) -> None: super().__init__(path) self.__name_id_mapper: TEFReport.NameIDMapper = TEFReport.NameIDMapper() - self._parse_json() + try: + self._parse_json() + except Exception as e: + print(f"Could not parse file: {self.path}") + raise e # Parsing stackFrames is currently not implemented # x = data["stackFrames"] From 3620593f752805e2ab48ce8203a4ca0c61a047ad Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 14 Nov 2023 14:24:28 +0100 Subject: [PATCH 201/224] Adds workloads to new CSs --- .../perf_tests/feature_perf_cs_collection.py | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 853efee77..07e1fadb0 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -1322,7 +1322,7 @@ class SynthFeatureLargeConfigSpace(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthFeatureLargeConfigSpace", - refspec="origin/f-FeatureCaseStudies", + refspec="origin/HEAD", limit=None, shallow=False, version_filter=project_filter_generator( @@ -1332,6 +1332,17 @@ class SynthFeatureLargeConfigSpace(VProject): FeatureSource() ] + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot("SynthFeatureLargeConfigSpace") / + RSBinary("LargeConfigSpace"), + ConfigParams(), + label="RestrictedConfigSpace-no-input" + ) + ] + } + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -1369,7 +1380,7 @@ class SynthFeatureRestrictedConfigSpace(VProject): bb.source.Git( remote="https://github.com/se-sic/FeaturePerfCSCollection.git", local="SynthFeatureRestrictedConfigSpace", - refspec="origin/f-FeatureCaseStudies", + refspec="origin/HEAD", limit=None, shallow=False, version_filter=project_filter_generator( @@ -1379,6 +1390,17 @@ class SynthFeatureRestrictedConfigSpace(VProject): FeatureSource() ] + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot("SynthFeatureRestrictedConfigSpace") / + RSBinary("RestrictedConfigSpace"), + ConfigParams(), + label="RestrictedConfigSpace-no-input" + ) + ] + } + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 From c897595e10fc139be1ebc87e501c93ddb626a04f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 16 Nov 2023 22:57:53 +0100 Subject: [PATCH 202/224] Min reg on BB side + cut off for regressions --- .../feature_perf_precision_database.py | 79 ++++++++++++++++--- 1 file changed, 66 insertions(+), 13 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index a65d76cea..63cdfcc1b 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -132,9 +132,33 @@ def get_matching_event( return feature_performances +def is_feature_relevant( + old_measurements, + new_measurements, + rel_cut_off: float = 0.01, + abs_cut_off: int = 20 +): + """Check if a feature can be ignored for regression checking as it's time + measurements seem not relevant.""" + old_mean = np.mean(old_measurements) + new_mean = np.mean(new_measurements) + + if old_mean < abs_cut_off and new_mean < abs_cut_off: + return False + + old_rel_cut_off = old_mean * rel_cut_off + abs_mean_diff = abs(old_mean - new_mean) + if abs_mean_diff < old_rel_cut_off: + return False + + return True + + def precise_pim_regression_check( baseline_pim: tp.DefaultDict[str, tp.List[int]], - current_pim: tp.DefaultDict[str, tp.List[int]] + current_pim: tp.DefaultDict[str, tp.List[int]], + rel_cut_off: float = 0.01, + abs_cut_off: int = 20 ) -> bool: is_regression = False @@ -145,8 +169,11 @@ def precise_pim_regression_check( continue new_values = current_pim[feature] - if np.mean(old_values) < 20 and np.mean(new_values) < 20: - # TODO: adapt this to a relative value + + # Skip features that seem not to be relevant for regressions testing + if not is_feature_relevant( + old_values, new_values, rel_cut_off, abs_cut_off + ): continue ttest_res = ttest_ind(old_values, new_values) @@ -170,7 +197,9 @@ def precise_pim_regression_check( def cliffs_delta_pim_regression_check( baseline_pim: tp.DefaultDict[str, tp.List[int]], - current_pim: tp.DefaultDict[str, tp.List[int]] + current_pim: tp.DefaultDict[str, tp.List[int]], + rel_cut_off: float = 0.01, + abs_cut_off: int = 20 ) -> bool: is_regression = False @@ -181,9 +210,11 @@ def cliffs_delta_pim_regression_check( continue new_values = current_pim[feature] - # if np.mean(old_values) < 20 and np.mean(new_values) < 20: - # # TODO: adapt this to a relative value - # continue + + if not is_feature_relevant( + old_values, new_values, rel_cut_off, abs_cut_off + ): + continue d, res = cliffs_delta(old_values, new_values) @@ -207,7 +238,9 @@ def cliffs_delta_pim_regression_check( def sum_pim_regression_check( baseline_pim: tp.DefaultDict[str, tp.List[int]], - current_pim: tp.DefaultDict[str, tp.List[int]] + current_pim: tp.DefaultDict[str, tp.List[int]], + rel_cut_off: float = 0.01, + abs_cut_off: int = 20 ) -> bool: # TODO: add some tests baseline_pim_totals: tp.List[tp.List[int]] = [ @@ -449,7 +482,7 @@ def get_regressing_config_ids_gt( """Computes the baseline data, i.e., the config ids where a regression was identified.""" - gt: tp.Dict[int, bool] = {} + ground_truth: tp.Dict[int, bool] = {} for config_id in case_study.get_config_ids_for_revision(rev): report_files = get_processed_revisions_files( @@ -477,9 +510,21 @@ def get_regressing_config_ids_gt( if not new_time: return None + # TODO: what baseline cutoff to choose? + # req_diff = np.mean(old_time.measurements_wall_clock_time) * 0.03 + req_diff = 0.1 if np.mean(old_time.measurements_wall_clock_time ) == np.mean(new_time.measurements_wall_clock_time): - gt[config_id] = False + ground_truth[config_id] = False + elif abs( + np.mean(old_time.measurements_wall_clock_time) - + np.mean(new_time.measurements_wall_clock_time) + ) < req_diff: + # if project_name == "DunePerfRegression": + # print( + # f"No {patch_name=}_{config_id=} -> {old_time.measurements_wall_clock_time=} || {new_time.measurements_wall_clock_time=}" + # ) + ground_truth[config_id] = False else: # d, res = cliffs_delta( # old_time.measurements_wall_clock_time, @@ -494,11 +539,19 @@ def get_regressing_config_ids_gt( # if res == "large": # if d > 0.7 or d < -0.7: if ttest_res.pvalue < 0.05: - gt[config_id] = True + # if project_name == "DunePerfRegression": + # print( + # f"Reg {patch_name=}_{config_id=} -> {old_time.measurements_wall_clock_time=} || {new_time.measurements_wall_clock_time=}" + # ) + ground_truth[config_id] = True else: - gt[config_id] = False + # if project_name == "DunePerfRegression": + # print( + # f"No {patch_name=}_{config_id=} -> {old_time.measurements_wall_clock_time=} || {new_time.measurements_wall_clock_time=}" + # ) + ground_truth[config_id] = False - return gt + return ground_truth def map_to_positive_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: From 28ac7b6dcde18ca9fe4d34e8d9748a616609f58f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 16 Nov 2023 23:11:14 +0100 Subject: [PATCH 203/224] Instrument all HyTeg regions --- varats/varats/experiments/vara/feature_perf_precision.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 04df6f8c1..3ff17ce5c 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -105,6 +105,9 @@ def get_threshold(project: VProject) -> int: return 0 + if project.name in ["HyTeg"]: + return 0 + return 100 From 438e8fdc4a04f6a25bb9b972633abc244f551cfc Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 08:34:53 +0100 Subject: [PATCH 204/224] Final fixes for plots and tables --- .../data/databases/feature_perf_precision_database.py | 6 ++---- varats/varats/plots/feature_perf_precision.py | 7 +++---- varats/varats/tables/feature_perf_precision.py | 5 ++++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 63cdfcc1b..f9ec6022e 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -161,6 +161,7 @@ def precise_pim_regression_check( abs_cut_off: int = 20 ) -> bool: is_regression = False + abs_cut_off = 100 for feature, old_values in baseline_pim.items(): if feature in current_pim: @@ -178,12 +179,10 @@ def precise_pim_regression_check( ttest_res = ttest_ind(old_values, new_values) - # TODO: check, maybe we need a "very small value cut off" if ttest_res.pvalue < 0.05: - # print(f"Found regression for feature {feature}.") is_regression = True else: - if np.mean(old_values) > 20: + if np.mean(old_values) > abs_cut_off: print( f"Could not find feature {feature} in new trace. " f"({np.mean(old_values)}us lost)" @@ -603,7 +602,6 @@ def compute_profiler_predictions( return None try: - #print(f"{report_files[0]}") result_dict[config_id] = profiler.is_regression( report_files[0], patch_name ) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index eeed847c2..fa3b43672 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -112,7 +112,7 @@ def plot(self, view_mode: bool) -> None: value_name="value" ) - colors = sns.color_palette("Paired", len(case_studies) * 2) + colors = sns.color_palette("Paired", len(profilers) * 2) _, axes = plt.subplots(ncols=len(profilers), nrows=1, sharey=True) for idx, profiler in enumerate(profilers): @@ -129,7 +129,6 @@ def plot(self, view_mode: bool) -> None: cut=0, split=True, palette=color_slice, - alpha=.25, linewidth=1, ax=ax ) @@ -144,8 +143,8 @@ def plot(self, view_mode: bool) -> None: linewidth=0.5, marker='x', palette=[ - mcolors.CSS4_COLORS['dimgrey'], - mcolors.CSS4_COLORS['darkgrey'] + mcolors.XKCD_COLORS['xkcd:dark grey'], + mcolors.CSS4_COLORS['dimgrey'] ], size=7, ax=ax diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index b587b0943..aff36c4bb 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -407,7 +407,6 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: overhead_df['overhead_memory_rel'].replace([np.inf, -np.inf], np.nan, inplace=True) - print(f"{overhead_df=}") # Merge with precision data merged_df = pd.merge( @@ -615,6 +614,10 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: locs: int if case_study.project_cls.DOMAIN == ProjectDomains.TEST: src_folder = f'projects/{project_name}' + if src_folder.endswith( + "projects/SynthCTTemplateSpecialization" + ): + src_folder = "projects/SynthCTSpecialization" locs = self._calc_folder_locs( project_git_path, rev.hash, src_folder ) From de76d14f8de5ca0bc9ee64a96f3a5fbc4477b2f4 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 08:49:03 +0100 Subject: [PATCH 205/224] Clean up left over code --- .../feature_perf_precision_database.py | 51 +++++++------------ 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index f9ec6022e..50be9c737 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -217,16 +217,15 @@ def cliffs_delta_pim_regression_check( d, res = cliffs_delta(old_values, new_values) - # print(f"{d=}, {res=}") - - # if d > 0.70 or d < -0.7: # if res == "large": - if d > 0.7 or d < -0.7: - # print( - # f"{self.name} found regression for feature {feature}." - # ) + if abs(d) > 0.7: is_regression = True else: + if np.mean(old_values) > abs_cut_off: + print( + f"Could not find feature {feature} in new trace. " + f"({np.mean(old_values)}us lost)" + ) print(f"Could not find feature {feature} in new trace.") # TODO: how to handle this? # raise NotImplementedError() @@ -241,7 +240,6 @@ def sum_pim_regression_check( rel_cut_off: float = 0.01, abs_cut_off: int = 20 ) -> bool: - # TODO: add some tests baseline_pim_totals: tp.List[tp.List[int]] = [ old_values for feature, old_values in baseline_pim.items() if feature != "Base" @@ -262,9 +260,11 @@ def sum_pim_regression_check( # How do we get here? return False - # d, res = cliffs_delta(baseline_pim_total, current_pim_total) - # # return res == "large" - # return d > 0.6 or d < -0.6 + mean_baseline = np.mean(baseline_pim_total) + mean_diff = abs(mean_baseline - np.mean(current_pim_total)) + if mean_diff < abs_cut_off or mean_diff < mean_baseline * rel_cut_off: + return False + return ttest_ind(baseline_pim_total, current_pim_total).pvalue < 0.05 @@ -504,13 +504,11 @@ def get_regressing_config_ids_gt( time_reports = fpp.MPRTimeReportAggregate(report_files[0].full_path()) old_time = time_reports.get_baseline_report() - # new_time = time_reports.get_new_report() new_time = time_reports.get_report_for_patch(patch_name) if not new_time: return None - # TODO: what baseline cutoff to choose? - # req_diff = np.mean(old_time.measurements_wall_clock_time) * 0.03 + # Cut off regressions smaller than 100ms req_diff = 0.1 if np.mean(old_time.measurements_wall_clock_time ) == np.mean(new_time.measurements_wall_clock_time): @@ -519,35 +517,16 @@ def get_regressing_config_ids_gt( np.mean(old_time.measurements_wall_clock_time) - np.mean(new_time.measurements_wall_clock_time) ) < req_diff: - # if project_name == "DunePerfRegression": - # print( - # f"No {patch_name=}_{config_id=} -> {old_time.measurements_wall_clock_time=} || {new_time.measurements_wall_clock_time=}" - # ) ground_truth[config_id] = False else: - # d, res = cliffs_delta( - # old_time.measurements_wall_clock_time, - # new_time.measurements_wall_clock_time - # ) - ttest_res = ttest_ind( old_time.measurements_wall_clock_time, new_time.measurements_wall_clock_time ) - # if res == "large": - # if d > 0.7 or d < -0.7: if ttest_res.pvalue < 0.05: - # if project_name == "DunePerfRegression": - # print( - # f"Reg {patch_name=}_{config_id=} -> {old_time.measurements_wall_clock_time=} || {new_time.measurements_wall_clock_time=}" - # ) ground_truth[config_id] = True else: - # if project_name == "DunePerfRegression": - # print( - # f"No {patch_name=}_{config_id=} -> {old_time.measurements_wall_clock_time=} || {new_time.measurements_wall_clock_time=}" - # ) ground_truth[config_id] = False return ground_truth @@ -584,6 +563,10 @@ def compute_profiler_predictions( result_dict: tp.Dict[int, bool] = {} for config_id in config_ids: + print( + f"Compute profiler predictions:\n profiler={profiler.name} - " + f"{project_name=} - {patch_name} - {config_id=}" + ) report_files = get_processed_revisions_files( project_name, profiler.experiment, @@ -774,6 +757,7 @@ def load_precision_data(case_studies, profilers) -> pd.DataFrame: project_name, case_study, rev, patch_name ) except: + # TODO: ??? continue for profiler in profilers: @@ -789,7 +773,6 @@ def load_precision_data(case_studies, profilers) -> pd.DataFrame: if ground_truth else -1 } - # TODO: multiple patch cycles predicted = compute_profiler_predictions( profiler, project_name, case_study, case_study.get_config_ids_for_revision(rev), patch_name From 530d5e11a645ebdc6297ea04921156d5706633c3 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 08:54:07 +0100 Subject: [PATCH 206/224] Fixes print --- varats/varats/data/databases/feature_perf_precision_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 50be9c737..62d8e4775 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -564,7 +564,7 @@ def compute_profiler_predictions( result_dict: tp.Dict[int, bool] = {} for config_id in config_ids: print( - f"Compute profiler predictions:\n profiler={profiler.name} - " + f"Compute profiler predictions: profiler={profiler.name} - " f"{project_name=} - {patch_name} - {config_id=}" ) report_files = get_processed_revisions_files( From 6b48c93896b6bfe9927bcb48f173347f53941329 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 09:02:59 +0100 Subject: [PATCH 207/224] Bump matplotlib and seaborn versions --- requirements.txt | 4 ++-- varats/setup.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9bf3280be..7bbc7f53e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ ijson>=3.1.4 Jinja2>=3.1.2 jupyter>=1.0.0 kaleido>=0.2.1 -matplotlib>=3.7.1 +matplotlib>=3.8.2 networkx>=3.0 numpy>=1.24.2 packaging>=20.1 @@ -29,7 +29,7 @@ PyYAML>=6.0 requests>=2.28.2 rich>=12.6 scikit-learn>=1.2.2 -seaborn>=0.12.2 +seaborn>=0.13.0 tabulate>=0.9 types-PyYAML types-requests diff --git a/varats/setup.py b/varats/setup.py index b0f917fc0..5c30c0314 100644 --- a/varats/setup.py +++ b/varats/setup.py @@ -23,7 +23,7 @@ "graphviz>=0.14.2", "Jinja2>=3.1.2", "kaleido>=0.2.1", - "matplotlib>=3.7.1", + "matplotlib>=3.8.2", "networkx>=3.0", "numpy>=1.24.2", "packaging>=20.1", @@ -40,7 +40,7 @@ "PyYAML>=6.0", "rich>=12.6", "scikit-learn>=1.2.2", - "seaborn>=0.12.2", + "seaborn>=0.13.0", "tabulate>=0.9", "varats-core>=13.0.5", "wllvm>=1.3.1", From 5e2efb136d8e9d3272171d2e023aaad34d24d6f1 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 09:06:01 +0100 Subject: [PATCH 208/224] Cleans up tef report changes --- varats-core/varats/report/tef_report.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/varats-core/varats/report/tef_report.py b/varats-core/varats/report/tef_report.py index 84b38733e..9fcc6ee28 100644 --- a/varats-core/varats/report/tef_report.py +++ b/varats-core/varats/report/tef_report.py @@ -52,6 +52,8 @@ class TraceEvent(): """Represents a trace event that was captured during the analysis of a target program.""" + __uuid: int + def __init__( self, json_trace_event: tp.Dict[str, tp.Any], name_id: int, name_id_mapper: 'TEFReport.NameIDMapper' @@ -67,9 +69,9 @@ def __init__( self.__tid = int(json_trace_event["tid"]) if "UUID" in json_trace_event: - self.__uuid: int = int(json_trace_event["UUID"]) + self.__uuid = int(json_trace_event["UUID"]) elif "ID" in json_trace_event: - self.__uuid: int = int(json_trace_event["ID"]) + self.__uuid = int(json_trace_event["ID"]) else: LOG.critical("Could not parse UUID/ID from trace event") self.__uuid: int = 0 @@ -152,7 +154,7 @@ def stack_frames(self) -> None: "Stack frame parsing is currently not implemented!" ) - def _patch_errors_from_file(self): + def _patch_errors_from_file(self) -> None: with open(self.path, "r") as f: data = f.read() @@ -168,7 +170,7 @@ def _patch_errors_from_file(self): f.write(line) def _parse_json(self) -> None: - trace_events: tp.List[TraceEvent] = list() + trace_events: tp.List[TraceEvent] = [] self._patch_errors_from_file() @@ -207,10 +209,6 @@ def _parse_json(self) -> None: self.__trace_events: tp.List[TraceEvent] = trace_events -def extract_feature_data(): - pass - - class TEFReportAggregate( ReportAggregate[TEFReport], shorthand=TEFReport.SHORTHAND + ReportAggregate.SHORTHAND, From fa1dcb5bc35d0c6ae63c7fb59592141122711728 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 09:34:50 +0100 Subject: [PATCH 209/224] More clean up --- .../varats/experiment/experiment_util.py | 2 +- .../feature_perf_precision_database.py | 135 ++++++++++-------- .../varats/tables/feature_perf_precision.py | 5 - 3 files changed, 75 insertions(+), 67 deletions(-) diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index 73521b867..457c2e694 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -523,7 +523,7 @@ def call_with_output_folder(self, tmp_dir: Path) -> StepResult: ) -class ZippedExperimentSteps(MultiStep[ZippedStepTy]): +class ZippedExperimentSteps(MultiStep[ZippedStepTy]): # type: ignore """Runs multiple actions, providing them a shared tmp folder that afterwards is zipped into an archive.""" diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 62d8e4775..3abfd90f8 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -7,15 +7,19 @@ import numpy as np import pandas as pd -from cliffs_delta import cliffs_delta +from cliffs_delta import cliffs_delta # type: ignore from scipy.stats import ttest_ind import varats.experiments.vara.feature_perf_precision as fpp from varats.data.metrics import ConfusionMatrix +from varats.data.reports.performance_influence_trace_report import ( + PerfInfluenceTraceReport, +) from varats.experiments.vara.feature_experiment import FeatureExperiment from varats.paper.case_study import CaseStudy from varats.paper_mgmt.case_study import get_case_study_file_name_filter -from varats.report.gnu_time_report import TimeReportAggregate, TimeReport +from varats.report.gnu_time_report import TimeReportAggregate +from varats.report.multi_patch_report import MultiPatchReport from varats.report.report import BaseReport, ReportFilepath from varats.report.tef_report import ( TEFReport, @@ -60,7 +64,7 @@ def get_feature_performance_from_tef_report( def get_matching_event( open_events: tp.List[TraceEvent], closing_event: TraceEvent - ): + ) -> tp.Optional[TraceEvent]: for event in open_events: if ( event.uuid == closing_event.uuid and @@ -137,7 +141,7 @@ def is_feature_relevant( new_measurements, rel_cut_off: float = 0.01, abs_cut_off: int = 20 -): +) -> bool: """Check if a feature can be ignored for regression checking as it's time measurements seem not relevant.""" old_mean = np.mean(old_measurements) @@ -160,6 +164,8 @@ def precise_pim_regression_check( rel_cut_off: float = 0.01, abs_cut_off: int = 20 ) -> bool: + """Compute if there was a regression in one of the feature terms of the + model between the current and the baseline, using a Mann-Whitney U test.""" is_regression = False abs_cut_off = 100 @@ -200,6 +206,8 @@ def cliffs_delta_pim_regression_check( rel_cut_off: float = 0.01, abs_cut_off: int = 20 ) -> bool: + """Compute if there was a regression in one of the feature terms of the + model between the current and the baseline, using cliffs delta.""" is_regression = False for feature, old_values in baseline_pim.items(): @@ -240,6 +248,12 @@ def sum_pim_regression_check( rel_cut_off: float = 0.01, abs_cut_off: int = 20 ) -> bool: + """ + Compute if there was a regression in the sum of the features in the model + between the current and the baseline. + + The comparision is done through a Mann-Whitney U test. + """ baseline_pim_totals: tp.List[tp.List[int]] = [ old_values for feature, old_values in baseline_pim.items() if feature != "Base" @@ -332,7 +346,7 @@ def is_regression( self, report_path: ReportFilepath, patch_name: str ) -> bool: """Checks if there was a regression between the old an new data.""" - multi_report = fpp.MultiPatchReport( + multi_report = MultiPatchReport( report_path.full_path(), TEFReportAggregate ) @@ -366,7 +380,9 @@ def __init__(self) -> None: ) @staticmethod - def __aggregate_pim_data(reports) -> tp.DefaultDict[str, tp.List[int]]: + def __aggregate_pim_data( + reports: tp.List[PerfInfluenceTraceReport] + ) -> tp.DefaultDict[str, tp.List[int]]: acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) for old_pim_report in reports: per_report_acc_pim: tp.DefaultDict[str, int] = defaultdict(int) @@ -388,24 +404,19 @@ def is_regression( self, report_path: ReportFilepath, patch_name: str ) -> bool: """Checks if there was a regression between the old an new data.""" - multi_report = fpp.MultiPatchReport( + multi_report = MultiPatchReport( report_path.full_path(), fpp.PerfInfluenceTraceReportAggregate ) - try: - old_acc_pim = self.__aggregate_pim_data( - multi_report.get_baseline_report().reports() - ) + old_acc_pim = self.__aggregate_pim_data( + multi_report.get_baseline_report().reports() + ) - opt_mr = multi_report.get_report_for_patch(patch_name) - if not opt_mr: - raise NotImplementedError() + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() - new_acc_pim = self.__aggregate_pim_data(opt_mr.reports()) - except Exception as exc: - print(f"FAILURE: Report parsing failed: {report_path}") - print(exc) - return False + new_acc_pim = self.__aggregate_pim_data(opt_mr.reports()) return pim_regression_check(old_acc_pim, new_acc_pim) @@ -423,7 +434,7 @@ def is_regression( self, report_path: ReportFilepath, patch_name: str ) -> bool: """Checks if there was a regression between the old an new data.""" - multi_report = fpp.MultiPatchReport( + multi_report = MultiPatchReport( report_path.full_path(), TEFReportAggregate ) @@ -595,79 +606,79 @@ def compute_profiler_predictions( ) print(exception) print(traceback.format_exc()) + # TODO: clean up # raise exception return result_dict class OverheadData: + """Data class to store the collected overhead data and provide high-level + operations on it.""" def __init__( - self, profiler, mean_time: tp.Dict[int, float], - mean_memory: tp.Dict[int, float], major_page_faults: tp.Dict[int, - float], - minor_page_faults: tp.Dict[int, float], fs_inputs: tp.Dict[int, float], - fs_outputs: tp.Dict[int, float] + self, mean_time: tp.Dict[int, float], mean_memory: tp.Dict[int, float], + major_page_faults: tp.Dict[int, + float], minor_page_faults: tp.Dict[int, + float], + fs_inputs: tp.Dict[int, float], fs_outputs: tp.Dict[int, float] ) -> None: - self.__profiler = profiler - self.__mean_time: tp.Dict[int, float] = mean_time - self.__mean_memory: tp.Dict[int, float] = mean_memory - self.__mean_major_page_faults: tp.Dict[int, float] = major_page_faults - self.__mean_minor_page_faults: tp.Dict[int, float] = minor_page_faults - self.__mean_fs_inputs: tp.Dict[int, float] = fs_inputs - self.__mean_fs_outputs: tp.Dict[int, float] = fs_outputs + self._mean_time: tp.Dict[int, float] = mean_time + self._mean_memory: tp.Dict[int, float] = mean_memory + self._mean_major_page_faults: tp.Dict[int, float] = major_page_faults + self._mean_minor_page_faults: tp.Dict[int, float] = minor_page_faults + self._mean_fs_inputs: tp.Dict[int, float] = fs_inputs + self._mean_fs_outputs: tp.Dict[int, float] = fs_outputs def mean_time(self) -> float: - return float(np.mean(list(self.__mean_time.values()))) + return float(np.mean(list(self._mean_time.values()))) def mean_memory(self) -> float: - return float(np.mean(list(self.__mean_memory.values()))) + return float(np.mean(list(self._mean_memory.values()))) def mean_major_page_faults(self) -> float: - return float(np.mean(list(self.__mean_major_page_faults.values()))) + return float(np.mean(list(self._mean_major_page_faults.values()))) def mean_minor_page_faults(self) -> float: - return float(np.mean(list(self.__mean_minor_page_faults.values()))) + return float(np.mean(list(self._mean_minor_page_faults.values()))) def mean_fs_inputs(self) -> float: - return float(np.mean(list(self.__mean_fs_inputs.values()))) + return float(np.mean(list(self._mean_fs_inputs.values()))) def mean_fs_outputs(self) -> float: - return float(np.mean(list(self.__mean_fs_outputs.values()))) + return float(np.mean(list(self._mean_fs_outputs.values()))) def config_wise_time_diff(self, other: 'OverheadData') -> tp.Dict[int, float]: - return self.__config_wise(self.__mean_time, other.__mean_time) + return self.__config_wise(self._mean_time, other._mean_time) def config_wise_memory_diff(self, other: 'OverheadData') -> tp.Dict[int, float]: - return self.__config_wise(self.__mean_memory, other.__mean_memory) + return self.__config_wise(self._mean_memory, other._mean_memory) def config_wise_major_page_faults_diff( self, other: 'OverheadData' ) -> tp.Dict[int, float]: return self.__config_wise( - self.__mean_major_page_faults, other.__mean_major_page_faults + self._mean_major_page_faults, other._mean_major_page_faults ) def config_wise_minor_page_faults_diff( self, other: 'OverheadData' ) -> tp.Dict[int, float]: return self.__config_wise( - self.__mean_minor_page_faults, other.__mean_minor_page_faults + self._mean_minor_page_faults, other._mean_minor_page_faults ) def config_wise_fs_inputs_diff( self, other: 'OverheadData' ) -> tp.Dict[int, float]: - return self.__config_wise(self.__mean_fs_inputs, other.__mean_fs_inputs) + return self.__config_wise(self._mean_fs_inputs, other._mean_fs_inputs) def config_wise_fs_outputs_diff( self, other: 'OverheadData' ) -> tp.Dict[int, float]: - return self.__config_wise( - self.__mean_fs_outputs, other.__mean_fs_outputs - ) + return self.__config_wise(self._mean_fs_outputs, other._mean_fs_outputs) @staticmethod def __config_wise( @@ -686,6 +697,7 @@ def __config_wise( def compute_overhead_data( profiler: Profiler, case_study: CaseStudy, rev: FullCommitHash ) -> tp.Optional['OverheadData']: + """Computes overhead data for a given case study.""" mean_time: tp.Dict[int, float] = {} mean_memory: tp.Dict[int, float] = {} @@ -738,27 +750,24 @@ def compute_overhead_data( ) return None - # print(f"{mean_time=}") return OverheadData( - profiler, mean_time, mean_memory, mean_major_page_faults, + mean_time, mean_memory, mean_major_page_faults, mean_minor_page_faults, mean_fs_inputs, mean_fs_outputs ) def load_precision_data(case_studies, profilers) -> pd.DataFrame: + """Loads precision measurement data for the given cases studies and computes + precision and recall for the different profilers.""" table_rows_plot = [] for case_study in case_studies: for patch_name in get_patch_names(case_study): rev = case_study.revisions[0] project_name = case_study.project_name - try: - ground_truth = get_regressing_config_ids_gt( - project_name, case_study, rev, patch_name - ) - except: - # TODO: ??? - continue + ground_truth = get_regressing_config_ids_gt( + project_name, case_study, rev, patch_name + ) for profiler in profilers: new_row = { @@ -806,6 +815,8 @@ def load_precision_data(case_studies, profilers) -> pd.DataFrame: def load_overhead_data(case_studies, profilers) -> pd.DataFrame: + """Loads overhead measurement data for the given cases studies and computes + overhead metrics that where introduced by the different profilers.""" table_rows = [] for case_study in case_studies: @@ -852,12 +863,14 @@ def load_overhead_data(case_studies, profilers) -> pd.DataFrame: memory_diff = profiler_overhead.config_wise_memory_diff( overhead_ground_truth ) - major_page_faults_diff = profiler_overhead.config_wise_major_page_faults_diff( - overhead_ground_truth - ) - minor_page_faults_diff = profiler_overhead.config_wise_minor_page_faults_diff( - overhead_ground_truth - ) + major_page_faults_diff = \ + profiler_overhead.config_wise_major_page_faults_diff( + overhead_ground_truth + ) + minor_page_faults_diff = \ + profiler_overhead.config_wise_minor_page_faults_diff( + overhead_ground_truth + ) fs_inputs_diff = profiler_overhead.config_wise_fs_inputs_diff( overhead_ground_truth ) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index aff36c4bb..25af1fcc4 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -423,11 +423,8 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: ] ) - # print(f"pivot_df=\n{pivot_df}") - # print(f"{pivot_df.columns=}") pivot_df = pivot_df.swaplevel(0, 1, 1).sort_index(axis=1) - # print(f"pivot_df=\n{pivot_df}") columns = [ 'precision', 'recall', 'overhead_time_rel', 'overhead_memory_rel', 'overhead_memory' @@ -438,8 +435,6 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: axis=1) print(f"pivot_df=\n{pivot_df}") - # print(f"{pivot_df.columns=}") - pivot_df.loc["Total"] = pivot_df.mean() print(f"pivot_df=\n{pivot_df}") From a3354d472314a20ad2e3e07a53e0ccabdbd7fad5 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 10:09:43 +0100 Subject: [PATCH 210/224] More clean up --- .../feature_perf_precision_database.py | 28 ++++--- varats/varats/plots/feature_perf_precision.py | 79 ++++++++----------- 2 files changed, 50 insertions(+), 57 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 3abfd90f8..9417593c1 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -14,6 +14,7 @@ from varats.data.metrics import ConfusionMatrix from varats.data.reports.performance_influence_trace_report import ( PerfInfluenceTraceReport, + PerfInfluenceTraceReportAggregate, ) from varats.experiments.vara.feature_experiment import FeatureExperiment from varats.paper.case_study import CaseStudy @@ -137,8 +138,8 @@ def get_matching_event( def is_feature_relevant( - old_measurements, - new_measurements, + old_measurements: tp.List[int], + new_measurements: tp.List[int], rel_cut_off: float = 0.01, abs_cut_off: int = 20 ) -> bool: @@ -223,10 +224,10 @@ def cliffs_delta_pim_regression_check( ): continue - d, res = cliffs_delta(old_values, new_values) + cdelta_val, _ = cliffs_delta(old_values, new_values) # if res == "large": - if abs(d) > 0.7: + if abs(cdelta_val) > 0.7: is_regression = True else: if np.mean(old_values) > abs_cut_off: @@ -288,8 +289,6 @@ def pim_regression_check( ) -> bool: """Compares two pims and determines if there was a regression between the baseline and current.""" - # return cliffs_delta_pim_regression_check(baseline_pim, current_pim) - # return sum_pim_regression_check(baseline_pim, current_pim) return precise_pim_regression_check(baseline_pim, current_pim) @@ -405,7 +404,7 @@ def is_regression( ) -> bool: """Checks if there was a regression between the old an new data.""" multi_report = MultiPatchReport( - report_path.full_path(), fpp.PerfInfluenceTraceReportAggregate + report_path.full_path(), PerfInfluenceTraceReportAggregate ) old_acc_pim = self.__aggregate_pim_data( @@ -458,6 +457,7 @@ def is_regression( def get_patch_names(case_study: CaseStudy) -> tp.List[str]: + """Looks up all patch names from the given case study.""" report_files = get_processed_revisions_files( case_study.project_name, fpp.BlackBoxBaselineRunner, @@ -559,10 +559,12 @@ class Baseline(Profiler): def __init__(self) -> None: super().__init__( "Base", fpp.BlackBoxBaselineRunner, fpp.BlackBoxOverheadBaseline, - fpp.TimeReportAggregate + TimeReportAggregate ) - def is_regression(self, report_path: ReportFilepath) -> bool: + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: raise NotImplementedError() @@ -756,7 +758,9 @@ def compute_overhead_data( ) -def load_precision_data(case_studies, profilers) -> pd.DataFrame: +def load_precision_data( + case_studies: tp.List[CaseStudy], profilers: tp.List[Profiler] +) -> pd.DataFrame: """Loads precision measurement data for the given cases studies and computes precision and recall for the different profilers.""" table_rows_plot = [] @@ -814,7 +818,9 @@ def load_precision_data(case_studies, profilers) -> pd.DataFrame: return pd.DataFrame(table_rows_plot) -def load_overhead_data(case_studies, profilers) -> pd.DataFrame: +def load_overhead_data( + case_studies: tp.List[CaseStudy], profilers: tp.List[Profiler] +) -> pd.DataFrame: """Loads overhead measurement data for the given cases studies and computes overhead metrics that where introduced by the different profilers.""" table_rows = [] diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index fa3b43672..b5ac14d58 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -20,35 +20,15 @@ ) from varats.paper.paper_config import get_loaded_paper_config from varats.plot.plot import Plot -from varats.plot.plots import PlotConfig, PlotGenerator +from varats.plot.plots import PlotGenerator from varats.plots.scatter_plot_utils import multivariate_grid from varats.utils.exceptions import UnsupportedOperation from varats.utils.git_util import FullCommitHash -def get_fake_prec_rows() -> tp.List[tp.Any]: - fake_rows = [] - fake_prof = [("prof1", 10), ("prof2", 42)] - for prof, seed in fake_prof: - random.seed(seed) - for _ in range(0, 3): - x = random.random() - y = random.random() - new_fake_row = { - 'CaseStudy': "fake", - 'Patch': "fpatch", - 'Configs': 42, - 'RegressedConfigs': 21, - 'precision': x, - 'recall': y, - 'Profiler': prof - } - fake_rows.append(new_fake_row) - - return fake_rows - - class PerfPrecisionPlot(Plot, plot_name='fperf_precision'): + """Precision plot that plots the precision and recall values of different + profilers.""" def plot(self, view_mode: bool) -> None: case_studies = get_loaded_paper_config().get_all_case_studies() @@ -57,9 +37,7 @@ def plot(self, view_mode: bool) -> None: # Data aggregation df = pd.DataFrame() df = load_precision_data(case_studies, profilers) - # df = pd.concat([df, pd.DataFrame(get_fake_prec_rows())]) df.sort_values(["CaseStudy"], inplace=True) - print(f"df=\n{df.to_string()}") grid = multivariate_grid( df, @@ -89,6 +67,7 @@ def calc_missing_revisions( class PerfPrecisionPlotGenerator( PlotGenerator, generator_name="fperf-precision", options=[] ): + """Generates precision plot.""" def generate(self) -> tp.List[Plot]: @@ -96,6 +75,8 @@ def generate(self) -> tp.List[Plot]: class PerfPrecisionDistPlot(Plot, plot_name='fperf_precision_dist'): + """Precision plot that plots the precision and recall distributions of + different profilers.""" def plot(self, view_mode: bool) -> None: case_studies = get_loaded_paper_config().get_all_case_studies() @@ -174,6 +155,7 @@ def calc_missing_revisions( class PerfProfDistPlotGenerator( PlotGenerator, generator_name="fperf-precision-dist", options=[] ): + """Generates performance distribution plot.""" def generate(self) -> tp.List[Plot]: @@ -181,19 +163,25 @@ def generate(self) -> tp.List[Plot]: class PerfOverheadPlot(Plot, plot_name='fperf_overhead'): + """Performance overhead plot that shows the pareto front of the different + performance metrics.""" def plot(self, view_mode: bool) -> None: # -- Configure plot -- plot_metric = [ ("Time", "overhead_time_rel"), ("Memory", "overhead_memory_rel"), - #("Major Page Faults", "overhead_major_page_faults_rel"), - #("Minor Page Faults", "overhead_minor_page_faults_rel"), - #("Filesystem Inputs", "overhead_fs_inputs_rel"), - #("Filesystem Outputs", "overhead_fs_outputs_rel"), ] + extra_metrics = False + if extra_metrics: + plot_metric.extend([ + ("Major Page Faults", "overhead_major_page_faults_rel"), + ("Minor Page Faults", "overhead_minor_page_faults_rel"), + ("Filesystem Inputs", "overhead_fs_inputs_rel"), + ("Filesystem Outputs", "overhead_fs_outputs_rel"), + ]) + target_row = "f1_score" - # target_row = "precision" # Load data case_studies = get_loaded_paper_config().get_all_case_studies() @@ -212,10 +200,8 @@ def plot(self, view_mode: bool) -> None: 'recall': 'mean', 'f1_score': 'mean' }) - print(f"precision_df=\n{precision_df}") overhead_df = load_overhead_data(case_studies, profilers) - print(f"{overhead_df=}") overhead_df['overhead_time_rel'] = overhead_df['time'] / ( overhead_df['time'] - overhead_df['overhead_time'] ) * 100 @@ -265,14 +251,9 @@ def plot(self, view_mode: bool) -> None: np.nan, inplace=True) - print(f"other_df=\n{overhead_df}") - merged_df = pd.merge( precision_df, overhead_df, on=["CaseStudy", "Profiler"] ) - print(f"merged_df=\n{merged_df}") - - # print(f"{self.plot_config.width()}") rows = 1 _, axes = plt.subplots( @@ -285,7 +266,6 @@ def plot(self, view_mode: bool) -> None: axes ) else: - print(f"{axes=}") if rows == 1: axes_list = list(axes) else: @@ -300,7 +280,7 @@ def plot(self, view_mode: bool) -> None: def do_single_plot( self, x_values, target_row, merged_df, plot_extra_name, ax ) -> None: - # ax = + """Plot a single overhead metric.""" sns.scatterplot( merged_df, x=x_values, @@ -353,9 +333,8 @@ def do_single_plot( ]].groupby('Profiler').agg(['mean', 'std']) prof_df.fillna(0, inplace=True) - print(f"{prof_df=}") pareto_front = self.plot_pareto_frontier( - prof_df[x_values]['mean'], prof_df[target_row]['mean'], maxX=False + prof_df[x_values]['mean'], prof_df[target_row]['mean'], max_x=False ) pf_x = [pair[0] for pair in pareto_front] @@ -400,14 +379,21 @@ def do_single_plot( zorder=1 ) - def plot_pareto_frontier(self, Xs, Ys, maxX=True, maxY=True): + def plot_pareto_frontier( + self, + x_values: tp.List[float], + y_values: tp.List[float], + max_x: bool = True, + max_y: bool = True + ): """Pareto frontier selection process.""" - sorted_list = sorted([[Xs[i], Ys[i]] for i in range(len(Xs))], - reverse=maxX) - print(f"{sorted_list=}") + sorted_list = sorted([ + [x_values[i], y_values[i]] for i in range(len(x_values)) + ], + reverse=max_x) pareto_front = [sorted_list[0]] for pair in sorted_list[1:]: - if maxY: + if max_y: if pair[1] >= pareto_front[-1][1]: if pair[0] == pareto_front[-1][0]: # If both points, have the same x-values, we should @@ -435,6 +421,7 @@ def calc_missing_revisions( class PerfOverheadPlotGenerator( PlotGenerator, generator_name="fperf-overhead", options=[] ): + """Generates overhead plot.""" def generate(self) -> tp.List[Plot]: return [PerfOverheadPlot(self.plot_config, **self.plot_kwargs)] From 13a227051688cacdf5847ff34fe02216bf918859 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 10:44:46 +0100 Subject: [PATCH 211/224] More clean up --- .../vara/feature_perf_precision.py | 72 +++---- varats/varats/plots/feature_perf_precision.py | 1 - .../varats/tables/feature_perf_precision.py | 202 +++--------------- 3 files changed, 59 insertions(+), 216 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 3ff17ce5c..8fad1ef31 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -11,12 +11,11 @@ from benchbuild.command import cleanup, ProjectCommand from benchbuild.environments.domain.declarative import ContainerImage from benchbuild.utils import actions -from benchbuild.utils.actions import StepResult, Clean -from benchbuild.utils.cmd import time, rm, cp, numactl, sudo, bpftrace, perf +from benchbuild.utils.actions import StepResult +from benchbuild.utils.cmd import time, cp, sudo, bpftrace from plumbum import local, BG from plumbum.commands.modifiers import Future -from varats.containers.containers import get_base_image, ImageBase from varats.data.reports.performance_influence_trace_report import ( PerfInfluenceTraceReportAggregate, ) @@ -87,6 +86,7 @@ def select_project_binaries(project: VProject) -> tp.List[ProjectBinaryWrapper]: def get_extra_cflags(project: VProject) -> tp.List[str]: + """Get additional cflags for some projects.""" if project.name in ["DunePerfRegression", "HyTeg"]: # Disable phasar for dune as the analysis cannot handle dunes size return ["-fvara-disable-phasar"] @@ -95,6 +95,7 @@ def get_extra_cflags(project: VProject) -> tp.List[str]: def get_threshold(project: VProject) -> int: + """Get the project specific instrumentation threshold.""" if project.DOMAIN is ProjectDomains.TEST: if project.name in [ "SynthSAFieldSensitivity", "SynthIPRuntime", "SynthIPTemplate", @@ -112,6 +113,7 @@ def get_threshold(project: VProject) -> int: class AnalysisProjectStepBase(OutputFolderStep): + """Base class for project steps.""" project: VProject @@ -137,6 +139,7 @@ def call_with_output_folder(self, tmp_dir: Path) -> StepResult: class MPRTimeReportAggregate( MultiPatchReport[TimeReportAggregate], shorthand="MPRTRA", file_type=".zip" ): + """Multi-patch wrapper report for time aggregates.""" def __init__(self, path: Path) -> None: super().__init__(path, TimeReportAggregate) @@ -145,6 +148,7 @@ def __init__(self, path: Path) -> None: class MPRTEFAggregate( MultiPatchReport[TEFReportAggregate], shorthand="MPRTEFA", file_type=".zip" ): + """Multi-patch wrapper report for tef aggregates.""" def __init__(self, path: Path) -> None: super().__init__(path, TEFReportAggregate) @@ -153,6 +157,7 @@ def __init__(self, path: Path) -> None: class MPRPIMAggregate( MultiPatchReport[TEFReportAggregate], shorthand="MPRPIMA", file_type=".zip" ): + """Multi-patch wrapper report for tef aggregates.""" def __init__(self, path: Path) -> None: # TODO: clean up report handling, we currently parse it as a TEFReport @@ -168,16 +173,6 @@ class RunGenTracedWorkloads(AnalysisProjectStepBase): # type: ignore project: VProject - def __init__( - self, - project: VProject, - binary: ProjectBinaryWrapper, - file_name: str, - report_file_ending: str = "json", - reps=REPS - ): - super().__init__(project, binary, file_name, report_file_ending, reps) - def call_with_output_folder(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -221,16 +216,6 @@ class RunBPFTracedWorkloads(AnalysisProjectStepBase): # type: ignore project: VProject - def __init__( - self, - project: VProject, - binary: ProjectBinaryWrapper, - file_name: str, - report_file_ending: str = "json", - reps=REPS - ): - super().__init__(project, binary, file_name, report_file_ending, reps) - def call_with_output_folder(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -261,21 +246,24 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: non_nfs_tmp_dir ) / self._binary.name - pb_cmd = prj_command.command.as_plumbum_wrapped_with( - adapted_binary_location= - adapted_binary_location, - project=self.project - ) + pb_cmd = \ + prj_command.command.as_plumbum_wrapped_with( + adapted_binary_location= + adapted_binary_location, + project=self.project + ) - bpf_runner = bpf_runner = self.attach_usdt_raw_tracing( - local_tracefile_path, - adapted_binary_location, - Path(non_nfs_tmp_dir) - ) + bpf_runner = \ + self.attach_usdt_raw_tracing( + local_tracefile_path, + adapted_binary_location, + Path(non_nfs_tmp_dir) + ) with cleanup(prj_command): print( - f"Running example {prj_command.command.label}" + "Running example " + f"{prj_command.command.label}" ) pb_cmd( retcode=self._binary.valid_exit_codes @@ -306,7 +294,6 @@ def attach_usdt_raw_tracing( # Assertion: Can be run without sudo password prompt. bpftrace_cmd = sudo[bpftrace_script] - # bpftrace_cmd = numactl["--cpunodebind=0", "--membind=0", bpftrace_cmd] bpftrace_runner = bpftrace_cmd & BG # give bpftrace time to start up, requires more time than regular USDT @@ -323,16 +310,6 @@ class RunBCCTracedWorkloads(AnalysisProjectStepBase): # type: ignore project: VProject - def __init__( - self, - project: VProject, - binary: ProjectBinaryWrapper, - file_name: str, - report_file_ending: str = "json", - reps=REPS - ): - super().__init__(project, binary, file_name, report_file_ending, reps) - def call_with_output_folder(self, tmp_dir: Path) -> StepResult: return self.run_traced_code(tmp_dir) @@ -392,7 +369,6 @@ def attach_usdt_bcc(report_file: Path, binary: Path) -> Future: "--executable", binary] print(f"{bcc_cmd=}") bcc_cmd = sudo[bcc_cmd] - # bcc_cmd = numactl["--cpunodebind=0", "--membind=0", bcc_cmd] bcc_runner = bcc_cmd & BG sleep(3) # give bcc script time to start up @@ -404,6 +380,7 @@ def setup_actions_for_vara_experiment( instr_type: FeatureInstrType, analysis_step: tp.Type[AnalysisProjectStepBase] ) -> tp.MutableSequence[actions.Step]: + """Sets up actions for a given perf precision experiment.""" project.cflags += experiment.get_vara_feature_cflags(project) @@ -445,7 +422,7 @@ def setup_actions_for_vara_experiment( get_current_config_id(project) ) - patch_provider = PatchProvider.get_provider_for_project(project) + patch_provider = PatchProvider.get_provider_for_project(type(project)) patches = patch_provider.get_patches_for_revision( ShortCommitHash(project.version_of_primary) )[IDENTIFIER_PATCH_TAG] @@ -934,6 +911,7 @@ def setup_actions_for_vara_overhead_experiment( instr_type: FeatureInstrType, analysis_step: tp.Type[AnalysisProjectStepBase] ) -> tp.MutableSequence[actions.Step]: + """Sets up actions for a given perf overhead experiment.""" project.cflags += experiment.get_vara_feature_cflags(project) threshold = get_threshold(project) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index b5ac14d58..4114da190 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -1,5 +1,4 @@ """Module for the FeaturePerfPrecision plots.""" -import random import typing as tp from itertools import chain diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 25af1fcc4..74597a411 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -3,11 +3,12 @@ import typing as tp from pathlib import Path -import matplotlib.colors as colors import matplotlib.pyplot as plt import numpy as np import pandas as pd -from plumbum import local, TF, RETCODE +from benchbuild.utils.cmd import git +from matplotlib import colors +from plumbum import local from pylatex import Document, Package from varats.data.databases.feature_perf_precision_database import ( @@ -33,42 +34,41 @@ from varats.table.table import Table from varats.table.table_utils import dataframe_to_table from varats.table.tables import TableFormat, TableGenerator -from varats.utils.git_util import calc_repo_loc, ChurnConfig, git +from varats.utils.git_util import calc_repo_loc, ChurnConfig -def cmap_map(function, cmap): +def cmap_map(function, cmap: colors.Colormap) -> colors.LinearSegmentedColormap: """ Applies function (which should operate on vectors of shape 3: [r, g, b]), on colormap cmap. This routine will break any discontinuous points in a colormap. """ - cdict = cmap._segmentdata + c_dict = cmap._segmentdata step_dict = {} # Firt get the list of points where the segments start or end for key in ('red', 'green', 'blue'): - step_dict[key] = list(map(lambda x: x[0], cdict[key])) + step_dict[key] = list(map(lambda x: x[0], c_dict[key])) step_list = sum(step_dict.values(), []) step_list = np.array(list(set(step_list))) # Then compute the LUT, and apply the function to the LUT reduced_cmap = lambda step: np.array(cmap(step)[0:3]) - old_LUT = np.array(list(map(reduced_cmap, step_list))) - new_LUT = np.array(list(map(function, old_LUT))) + old_lut = np.array(list(map(reduced_cmap, step_list))) + new_lut = np.array(list(map(function, old_lut))) # Now try to make a minimal segment definition of the new LUT - cdict = {} + c_dict = {} for i, key in enumerate(['red', 'green', 'blue']): this_cdict = {} for j, step in enumerate(step_list): if step in step_dict[key]: - this_cdict[step] = new_LUT[j, i] - elif new_LUT[j, i] != old_LUT[j, i]: - this_cdict[step] = new_LUT[j, i] + this_cdict[step] = new_lut[j, i] + elif new_lut[j, i] != old_lut[j, i]: + this_cdict[step] = new_lut[j, i] colorvector = list(map(lambda x: x + (x[1],), this_cdict.items())) colorvector.sort() - cdict[key] = colorvector + c_dict[key] = colorvector - import matplotlib - return matplotlib.colors.LinearSegmentedColormap('colormap', cdict, 1024) + return colors.LinearSegmentedColormap('colormap', c_dict, 1024) class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): @@ -76,6 +76,7 @@ class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): measurement approaches.""" def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + """Setup performance precision table.""" case_studies = get_loaded_paper_config().get_all_case_studies() profilers: tp.List[Profiler] = [VXray(), PIMTracer()] @@ -172,10 +173,9 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: print(f"{colum_setup=}") df.columns = pd.MultiIndex.from_tuples(colum_setup) - # Table config - print(f"{df=}") + # Table config style: pd.io.formats.style.Styler = df.style kwargs: tp.Dict[str, tp.Any] = {} if table_format.is_latex(): @@ -220,147 +220,25 @@ def generate(self) -> tp.List[Table]: ] -class FeaturePerfOverheadTable(Table, table_name="fperf_overhead"): - """Table that compares overhead of different feature performance measurement - approaches.""" - - def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: - case_studies = get_loaded_paper_config().get_all_case_studies() - profilers: tp.List[Profiler] = [VXray(), PIMTracer()] - - # Data aggregation - df = pd.DataFrame() - table_rows = [] - - for case_study in case_studies: - rev = case_study.revisions[0] - project_name = case_study.project_name - - overhead_ground_truth = OverheadData.compute_overhead_data( - Baseline(), case_study, rev - ) - if not overhead_ground_truth: - print( - f"No baseline data for {case_study.project_name}, skipping" - ) - continue - - new_row = { - 'CaseStudy': project_name, - 'WithoutProfiler_mean_time': overhead_ground_truth.mean_time(), - 'WithoutProfiler_mean_ctx': overhead_ground_truth.mean_ctx() - } - - for profiler in profilers: - profiler_overhead = OverheadData.compute_overhead_data( - profiler, case_study, rev - ) - if profiler_overhead: - time_diff = profiler_overhead.config_wise_time_diff( - overhead_ground_truth - ) - ctx_diff = profiler_overhead.config_wise_ctx_diff( - overhead_ground_truth - ) - print(f"{time_diff=}") - new_row[f"{profiler.name}_time_mean"] = np.mean( - list(time_diff.values()) - ) - new_row[f"{profiler.name}_time_std"] = np.std( - list(time_diff.values()) - ) - new_row[f"{profiler.name}_time_max"] = np.max( - list(time_diff.values()) - ) - new_row[f"{profiler.name}_ctx_mean"] = np.mean( - list(ctx_diff.values()) - ) - new_row[f"{profiler.name}_ctx_std"] = np.std( - list(ctx_diff.values()) - ) - new_row[f"{profiler.name}_ctx_max"] = np.max( - list(ctx_diff.values()) - ) - else: - new_row[f"{profiler.name}_time_mean"] = np.nan - new_row[f"{profiler.name}_time_std"] = np.nan - new_row[f"{profiler.name}_time_max"] = np.nan - - new_row[f"{profiler.name}_ctx_mean"] = np.nan - new_row[f"{profiler.name}_ctx_std"] = np.nan - new_row[f"{profiler.name}_ctx_max"] = np.nan - - table_rows.append(new_row) - # df.append(new_row, ignore_index=True) - - df = pd.concat([df, pd.DataFrame(table_rows)]) - df.sort_values(["CaseStudy"], inplace=True) - print(f"{df=}") - - colum_setup = [('', '', 'CaseStudy'), ('Baseline', 'time', 'mean'), - ('Baseline', 'ctx', 'mean')] - for profiler in profilers: - colum_setup.append((profiler.name, 'time', 'mean')) - colum_setup.append((profiler.name, 'time', 'std')) - colum_setup.append((profiler.name, 'time', 'max')) - - colum_setup.append((profiler.name, 'ctx', 'mean')) - colum_setup.append((profiler.name, 'ctx', 'std')) - colum_setup.append((profiler.name, 'ctx', 'max')) - - print(f"{colum_setup=}") - df.columns = pd.MultiIndex.from_tuples(colum_setup) - - # Table config - - print(f"{df=}") - - style: pd.io.formats.style.Styler = df.style - kwargs: tp.Dict[str, tp.Any] = {} - if table_format.is_latex(): - kwargs["hrules"] = True - column_format = "l|rr" - for _ in profilers: - column_format += "|rrrrrr" - kwargs["column_format"] = column_format - kwargs["multicol_align"] = "|c" - kwargs["caption" - ] = """This table depicts the overhead measurement data. -For each case study, we show on the left the mean time it took to execute it without instrumentation (Baseline). -To the right of the baseline, we show for each profiler the induced overhead. -""" - style.format(precision=2) - style.hide() - - def add_extras(doc: Document) -> None: - doc.packages.append(Package("amsmath")) - doc.packages.append(Package("amssymb")) - - return dataframe_to_table( - df, - table_format, - style=style, - wrap_table=wrap_table, - wrap_landscape=True, - document_decorator=add_extras, - **kwargs - ) - - -class FeaturePerfOverheadTableGenerator( - TableGenerator, generator_name="fperf-overhead", options=[] -): - """Generator for `FeaturePerfOverheadTable`.""" - - def generate(self) -> tp.List[Table]: - return [ - FeaturePerfOverheadTable(self.table_config, **self.table_kwargs) - ] +def truncate_colormap( + cmap: colors.Colormap, + minval: float = 0.0, + maxval: float = 1.0, + n: int = 100 +) -> colors.LinearSegmentedColormap: + """ + Truncates a given color map to a specific range and number of elements. + Args: + cmap: the original colormap + minval: smallest color value + maxval: largest color value + n: number of colors that should be in the map -def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100): + Returns: color map truncated to the given parameters + """ new_cmap = colors.LinearSegmentedColormap.from_list( - 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval), + f"trunc({cmap.name},{minval:.2f},{maxval:.2f})", cmap(np.linspace(minval, maxval, n)) ) return new_cmap @@ -371,6 +249,7 @@ class FeaturePerfOverheadComparisionTable(Table, table_name="fperf_overhead"): approaches.""" def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + """Setup performance overhead comparision table.""" case_studies = get_loaded_paper_config().get_all_case_studies() profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] @@ -388,7 +267,6 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: 'recall': 'mean', 'f1_score': 'mean' }) - print(f"precision_df=\n{precision_df}") overhead_df = load_overhead_data(case_studies, profilers) overhead_df = overhead_df[[ @@ -412,7 +290,6 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: merged_df = pd.merge( precision_df, overhead_df, on=["CaseStudy", "Profiler"] ) - print(f"merged_df=\n{merged_df}") pivot_df = merged_df.pivot( index='CaseStudy', @@ -433,10 +310,8 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: (prof.name, c) for prof in profilers for c in columns ], axis=1) - print(f"pivot_df=\n{pivot_df}") pivot_df.loc["Total"] = pivot_df.mean() - print(f"pivot_df=\n{pivot_df}") # Rename columns overhead_time_c_name = "$\Delta$ Time $(\%)$" @@ -500,14 +375,6 @@ def add_extras(doc: Document) -> None: vmax=100.0, ) - # style.background_gradient( - # cmap=gray_map, - # subset=[(prof.name, overhead_memory_val_c_name) - # for prof in profilers], - # vmin=0.0, - # # vmax=100.0, - # ) - kwargs["convert_css"] = True kwargs["column_format"] = "l" + "".join([ "rrrrr" for _ in profilers @@ -543,7 +410,6 @@ class FeaturePerfMetricsOverviewTable(Table, table_name="fperf_overview"): """Table showing some general information about feature performance case studies.""" - # TODO: refactor out @staticmethod def _calc_folder_locs(repo_path: Path, rev_range: str, folder: str) -> int: churn_config = ChurnConfig.create_c_style_languages_config() From 7fd6936f17933b0aa8cc20be2bde45110de0b691 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 12:26:43 +0100 Subject: [PATCH 212/224] Cleans up more mypy/pylint issues --- varats-core/varats/experiment/steps/patch.py | 2 +- .../feature_perf_precision_database.py | 3 +- .../vara/feature_perf_precision.py | 60 +++++++++++-------- varats/varats/plots/feature_perf_precision.py | 27 +++++---- varats/varats/projects/cpp_projects/hyteg.py | 4 +- .../varats/tables/feature_perf_precision.py | 36 +++++++---- 6 files changed, 78 insertions(+), 54 deletions(-) diff --git a/varats-core/varats/experiment/steps/patch.py b/varats-core/varats/experiment/steps/patch.py index e03fd63a3..bbaf96aff 100644 --- a/varats-core/varats/experiment/steps/patch.py +++ b/varats-core/varats/experiment/steps/patch.py @@ -48,7 +48,7 @@ class RevertPatch(actions.ProjectStep): NAME = "REVERT_PATCH" DESCRIPTION = "Revert a Git patch from a project." - def __init__(self, project, patch): + def __init__(self, project: VProject, patch: Patch) -> None: super().__init__(project) self.__patch = patch diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 9417593c1..68366cf3f 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -280,7 +280,8 @@ def sum_pim_regression_check( if mean_diff < abs_cut_off or mean_diff < mean_baseline * rel_cut_off: return False - return ttest_ind(baseline_pim_total, current_pim_total).pvalue < 0.05 + u_test = ttest_ind(baseline_pim_total, current_pim_total) + return u_test.pvalue < 0.05 # type: ignore def pim_regression_check( diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 8fad1ef31..ba170d0cf 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -124,7 +124,7 @@ def __init__( file_name: str, report_file_ending: str = "json", reps=REPS - ): + ) -> None: super().__init__(project=project) self._binary = binary self._report_file_ending = report_file_ending @@ -375,10 +375,15 @@ def attach_usdt_bcc(report_file: Path, binary: Path) -> Future: return bcc_runner +AnalysisProjectStepBaseTy = tp.TypeVar( + "AnalysisProjectStepBaseTy", bound=AnalysisProjectStepBase +) + + def setup_actions_for_vara_experiment( experiment: FeatureExperiment, project: VProject, instr_type: FeatureInstrType, - analysis_step: tp.Type[AnalysisProjectStepBase] + analysis_step: tp.Type[AnalysisProjectStepBaseTy] ) -> tp.MutableSequence[actions.Step]: """Sets up actions for a given perf precision experiment.""" @@ -570,7 +575,7 @@ def __init__( file_name: str, report_file_ending: str = "txt", reps=REPS - ): + ) -> None: super().__init__(project=project) self.__binary = binary self.__report_file_ending = report_file_ending @@ -602,10 +607,11 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: print(f"Running example {prj_command.command.label}") with cleanup(prj_command): - pb_cmd = prj_command.command.as_plumbum_wrapped_with( - time["-v", "-o", time_report_file], - project=self.project - ) + pb_cmd = \ + prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + project=self.project + ) pb_cmd(retcode=self.__binary.valid_exit_codes) return StepResult.OK @@ -722,7 +728,7 @@ def __init__( file_name: str, report_file_ending: str = "txt", reps=REPS - ): + ) -> None: super().__init__(project, binary, file_name, report_file_ending, reps) def call_with_output_folder(self, tmp_dir: Path) -> StepResult: @@ -755,10 +761,11 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: print(f"Running example {prj_command.command.label}") with cleanup(prj_command): - pb_cmd = prj_command.command.as_plumbum_wrapped_with( - time["-v", "-o", time_report_file], - project=self.project - ) + pb_cmd = \ + prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + project=self.project + ) pb_cmd(retcode=self._binary.valid_exit_codes) return StepResult.OK @@ -779,7 +786,7 @@ def __init__( file_name: str, report_file_ending: str = "txt", reps=REPS - ): + ) -> None: super().__init__(project, binary, file_name, report_file_ending, reps) def call_with_output_folder(self, tmp_dir: Path) -> StepResult: @@ -814,20 +821,23 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: non_nfs_tmp_dir ) / self._binary.name - pb_cmd = prj_command.command.as_plumbum_wrapped_with( - time["-v", "-o", time_report_file], - adapted_binary_location, - project=self.project - ) + pb_cmd = \ + prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + adapted_binary_location, + project=self.project + ) - bpf_runner = RunBPFTracedWorkloads.attach_usdt_raw_tracing( - fake_tracefile_path, adapted_binary_location, - Path(non_nfs_tmp_dir) - ) + bpf_runner = \ + RunBPFTracedWorkloads.attach_usdt_raw_tracing( + fake_tracefile_path, adapted_binary_location, + Path(non_nfs_tmp_dir) + ) with cleanup(prj_command): print( - f"Running example {prj_command.command.label}" + "Running example " + f"{prj_command.command.label}" ) pb_cmd(retcode=self._binary.valid_exit_codes) @@ -853,7 +863,7 @@ def __init__( file_name: str, report_file_ending: str = "txt", reps=REPS - ): + ) -> None: super().__init__(project, binary, file_name, report_file_ending, reps) def call_with_output_folder(self, tmp_dir: Path) -> StepResult: @@ -1073,7 +1083,7 @@ def __init__( binary: ProjectBinaryWrapper, report_file_ending: str = "txt", reps=REPS - ): + ) -> None: super().__init__(project=project) self.__binary = binary self.__report_file_ending = report_file_ending diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 4114da190..4367fbd30 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd import seaborn as sns +from matplotlib.axes import Axes from matplotlib.text import Text from varats.data.databases.feature_perf_precision_database import ( @@ -277,12 +278,13 @@ def plot(self, view_mode: bool) -> None: ) def do_single_plot( - self, x_values, target_row, merged_df, plot_extra_name, ax + self, x_values_name: str, target_row: str, merged_df: pd.DataFrame, + plot_extra_name: str, ax: Axes ) -> None: """Plot a single overhead metric.""" sns.scatterplot( merged_df, - x=x_values, + x=x_values_name, y=target_row, hue="Profiler", style='CaseStudy', @@ -291,8 +293,8 @@ def do_single_plot( ax=ax ) + text_obj: Text for text_obj in ax.legend().get_texts(): - text_obj: Text text_obj.set_fontsize("xx-large") if text_obj.get_text() == "Profiler": @@ -313,7 +315,7 @@ def do_single_plot( x_limit = max( np.max( np.nan_to_num( - merged_df[x_values], + merged_df[x_values_name], copy=True, nan=0.0, posinf=0.0, @@ -328,20 +330,23 @@ def do_single_plot( ax.yaxis.labelpad = 10 ax.xaxis.labelpad = 20 - prof_df = merged_df[['Profiler', 'precision', x_values, 'f1_score' - ]].groupby('Profiler').agg(['mean', 'std']) + prof_df = merged_df[[ + 'Profiler', 'precision', x_values_name, 'f1_score' + ]].groupby('Profiler').agg(['mean', 'std']) prof_df.fillna(0, inplace=True) pareto_front = self.plot_pareto_frontier( - prof_df[x_values]['mean'], prof_df[target_row]['mean'], max_x=False + prof_df[x_values_name]['mean'], + prof_df[target_row]['mean'], + max_x=False ) pf_x = [pair[0] for pair in pareto_front] pf_y = [pair[1] for pair in pareto_front] - x_loc = prof_df[x_values]['mean'] + x_loc = prof_df[x_values_name]['mean'] y_loc = prof_df[target_row]['mean'] - x_error = prof_df[x_values]['std'] + x_error = prof_df[x_values_name]['std'] y_error = prof_df[target_row]['std'] ax.errorbar( @@ -359,7 +364,7 @@ def do_single_plot( sns.scatterplot( prof_df, - x=(x_values, 'mean'), + x=(x_values_name, 'mean'), y=(target_row, 'mean'), hue="Profiler", ax=ax, @@ -384,7 +389,7 @@ def plot_pareto_frontier( y_values: tp.List[float], max_x: bool = True, max_y: bool = True - ): + ) -> tp.List[tp.List[float]]: """Pareto frontier selection process.""" sorted_list = sorted([ [x_values[i], y_values[i]] for i in range(len(x_values)) diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 18d6aa869..ecd90ca5b 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -7,7 +7,6 @@ from benchbuild.command import WorkloadSet, SourceRoot from benchbuild.utils.cmd import ninja, cmake, mkdir from benchbuild.utils.revision_ranges import SingleRevision -from benchbuild.utils.settings import get_number_of_jobs from plumbum import local from varats.experiment.workload_util import WorkloadCategory, RSBinary @@ -21,9 +20,8 @@ from varats.project.sources import FeatureSource from varats.project.varats_command import VCommand from varats.project.varats_project import VProject -from varats.utils.git_commands import init_all_submodules, update_all_submodules +from varats.utils.git_commands import update_all_submodules from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap -from varats.utils.settings import bb_cfg LOG = logging.getLogger(__name__) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 74597a411..3f785ed36 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -27,7 +27,6 @@ load_overhead_data, ) from varats.data.metrics import ConfusionMatrix -from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config from varats.project.project_domain import ProjectDomains from varats.project.project_util import get_local_project_git_path @@ -37,34 +36,41 @@ from varats.utils.git_util import calc_repo_loc, ChurnConfig -def cmap_map(function, cmap: colors.Colormap) -> colors.LinearSegmentedColormap: +def cmap_map( + function, cmap: colors.LinearSegmentedColormap +) -> colors.LinearSegmentedColormap: """ Applies function (which should operate on vectors of shape 3: [r, g, b]), on colormap cmap. This routine will break any discontinuous points in a colormap. """ - c_dict = cmap._segmentdata - step_dict = {} - # Firt get the list of points where the segments start or end + c_dict = cmap._segmentdata # pylint: disable=protected-access + step_dict: tp.Dict[str, tp.List[tp.Any]] = {} + + # First get the list of points where the segments start or end for key in ('red', 'green', 'blue'): step_dict[key] = list(map(lambda x: x[0], c_dict[key])) step_list = sum(step_dict.values(), []) - step_list = np.array(list(set(step_list))) + step_array = np.array(list(set(step_list))) + # Then compute the LUT, and apply the function to the LUT - reduced_cmap = lambda step: np.array(cmap(step)[0:3]) - old_lut = np.array(list(map(reduced_cmap, step_list))) + def reduced_cmap(step) -> np.ndarray: + return np.array(cmap(step)[0:3]) + + old_lut = np.array(list(map(reduced_cmap, step_array))) new_lut = np.array(list(map(function, old_lut))) + # Now try to make a minimal segment definition of the new LUT c_dict = {} for i, key in enumerate(['red', 'green', 'blue']): - this_cdict = {} - for j, step in enumerate(step_list): + this_c_dict = {} + for j, step in enumerate(step_array): if step in step_dict[key]: - this_cdict[step] = new_lut[j, i] + this_c_dict[step] = new_lut[j, i] elif new_lut[j, i] != old_lut[j, i]: - this_cdict[step] = new_lut[j, i] - colorvector = list(map(lambda x: x + (x[1],), this_cdict.items())) + this_c_dict[step] = new_lut[j, i] + colorvector = list(map(lambda x: x + (x[1],), this_c_dict.items())) colorvector.sort() c_dict[key] = colorvector @@ -185,12 +191,14 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: column_format += "|rrr" kwargs["column_format"] = column_format kwargs["multicol_align"] = "|c" + # pylint: disable=line-too-long kwargs[ "caption" ] = f"""Localization precision of different performance profiling approaches to detect configuration-specific performance regression detection. On the left, we show the amount of different configurations ({symb_configs}) analyzed and the amount of regressed configurations ({symb_regressed_configs}), determined through our baseline measurements. Furthermore, the table depicts for each profiler, precision ({symb_precision}), recall ({symb_recall}), and balanced accuracy ({symb_b_accuracy}). """ + # pylint: enable=line-too-long style.format(precision=2) style.hide() @@ -314,9 +322,11 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: pivot_df.loc["Total"] = pivot_df.mean() # Rename columns + # pylint: disable=anomalous-backslash-in-string overhead_time_c_name = "$\Delta$ Time $(\%)$" overhead_memory_c_name = "$\Delta$ Mem $(\%)$" overhead_memory_val_c_name = "$\Delta$ Mem $(Kbyte)$" + # pylint: enable=anomalous-backslash-in-string pivot_df = pivot_df.rename( columns={ "precision": "Precision", From 1d3f2b1a111c062245a62e8876ec1377103073a7 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 13:56:36 +0100 Subject: [PATCH 213/224] More fixes --- .../feature_perf_precision_database.py | 17 ++++----- .../vara/feature_perf_precision.py | 36 ++++++++++++------- varats/varats/jupyterhelper/file.py | 15 ++++++++ varats/varats/plots/feature_perf_precision.py | 8 ++--- .../varats/tables/feature_perf_precision.py | 29 ++++++++------- 5 files changed, 66 insertions(+), 39 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 68366cf3f..1c8c5ddaa 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -17,6 +17,7 @@ PerfInfluenceTraceReportAggregate, ) from varats.experiments.vara.feature_experiment import FeatureExperiment +from varats.jupyterhelper.file import load_mpr_time_report_aggregate from varats.paper.case_study import CaseStudy from varats.paper_mgmt.case_study import get_case_study_file_name_filter from varats.report.gnu_time_report import TimeReportAggregate @@ -476,12 +477,7 @@ def get_patch_names(case_study: CaseStudy) -> tp.List[str]: ) return [] - # TODO: fix to prevent double loading - try: - time_reports = fpp.MPRTimeReportAggregate(report_files[0].full_path()) - except: - print(f"Could not load report from: {report_files[0]}") - return [] + time_reports = load_mpr_time_report_aggregate(report_files[0].full_path()) return time_reports.get_patch_names() @@ -512,8 +508,9 @@ def get_regressing_config_ids_gt( ) return None - # TODO: fix to prevent double loading - time_reports = fpp.MPRTimeReportAggregate(report_files[0].full_path()) + time_reports = load_mpr_time_report_aggregate( + report_files[0].full_path() + ) old_time = time_reports.get_baseline_report() new_time = time_reports.get_report_for_patch(patch_name) @@ -651,6 +648,8 @@ def mean_fs_inputs(self) -> float: def mean_fs_outputs(self) -> float: return float(np.mean(list(self._mean_fs_outputs.values()))) + # TODO: remove after 'Type' notation is removed + # pylint: disable=protected-access def config_wise_time_diff(self, other: 'OverheadData') -> tp.Dict[int, float]: return self.__config_wise(self._mean_time, other._mean_time) @@ -683,6 +682,8 @@ def config_wise_fs_outputs_diff( ) -> tp.Dict[int, float]: return self.__config_wise(self._mean_fs_outputs, other._mean_fs_outputs) + # pylint: enable=protected-access + @staticmethod def __config_wise( self_map: tp.Dict[int, float], other_map: tp.Dict[int, float] diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index ba170d0cf..bc7c7b0a2 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -123,7 +123,7 @@ def __init__( binary: ProjectBinaryWrapper, file_name: str, report_file_ending: str = "json", - reps=REPS + reps: int = REPS ) -> None: super().__init__(project=project) self._binary = binary @@ -487,7 +487,10 @@ def actions_for_project( project: to analyze """ return setup_actions_for_vara_experiment( - self, project, FeatureInstrType.TEF, RunGenTracedWorkloads + self, + project, + FeatureInstrType.TEF, + RunGenTracedWorkloads # type: ignore[type-abstract] ) @@ -509,8 +512,10 @@ def actions_for_project( project: to analyze """ return setup_actions_for_vara_experiment( - self, project, FeatureInstrType.PERF_INFLUENCE_TRACE, - RunGenTracedWorkloads + self, + project, + FeatureInstrType.PERF_INFLUENCE_TRACE, + RunGenTracedWorkloads # type: ignore[type-abstract] ) @@ -534,7 +539,10 @@ def actions_for_project( project: to analyze """ return setup_actions_for_vara_experiment( - self, project, FeatureInstrType.USDT_RAW, RunBPFTracedWorkloads + self, + project, + FeatureInstrType.USDT_RAW, + RunBPFTracedWorkloads # type: ignore[type-abstract] ) @@ -556,7 +564,10 @@ def actions_for_project( project: to analyze """ return setup_actions_for_vara_experiment( - self, project, FeatureInstrType.USDT, RunBCCTracedWorkloads + self, + project, + FeatureInstrType.USDT, + RunBCCTracedWorkloads # type: ignore[type-abstract] ) @@ -574,7 +585,7 @@ def __init__( binary: ProjectBinaryWrapper, file_name: str, report_file_ending: str = "txt", - reps=REPS + reps: int = REPS ) -> None: super().__init__(project=project) self.__binary = binary @@ -727,7 +738,7 @@ def __init__( binary: ProjectBinaryWrapper, file_name: str, report_file_ending: str = "txt", - reps=REPS + reps: int = REPS ) -> None: super().__init__(project, binary, file_name, report_file_ending, reps) @@ -785,7 +796,7 @@ def __init__( binary: ProjectBinaryWrapper, file_name: str, report_file_ending: str = "txt", - reps=REPS + reps: int = REPS ) -> None: super().__init__(project, binary, file_name, report_file_ending, reps) @@ -830,7 +841,8 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult: bpf_runner = \ RunBPFTracedWorkloads.attach_usdt_raw_tracing( - fake_tracefile_path, adapted_binary_location, + fake_tracefile_path, \ + adapted_binary_location, Path(non_nfs_tmp_dir) ) @@ -862,7 +874,7 @@ def __init__( binary: ProjectBinaryWrapper, file_name: str, report_file_ending: str = "txt", - reps=REPS + reps: int = REPS ) -> None: super().__init__(project, binary, file_name, report_file_ending, reps) @@ -1082,7 +1094,7 @@ def __init__( project: VProject, binary: ProjectBinaryWrapper, report_file_ending: str = "txt", - reps=REPS + reps: int = REPS ) -> None: super().__init__(project=project) self.__binary = binary diff --git a/varats/varats/jupyterhelper/file.py b/varats/varats/jupyterhelper/file.py index 4c89e6f46..ddc9d54ab 100644 --- a/varats/varats/jupyterhelper/file.py +++ b/varats/varats/jupyterhelper/file.py @@ -18,6 +18,9 @@ SZZReport, PyDrillerSZZReport, ) +from varats.experiments.vara.feature_perf_precision import ( + MPRTimeReportAggregate, +) from varats.report.tef_report import TEFReport @@ -124,3 +127,15 @@ def load_tef_report(file_path: PathLikeTy) -> TEFReport: file_path (Path): Full path to the file """ return VDM.load_data_class_sync(file_path, TEFReport) + + +def load_mpr_time_report_aggregate( + file_path: PathLikeTy +) -> MPRTimeReportAggregate: + """ + Load a MPRTimeReportAggregate from a file. + + Attributes: + file_path (Path): Full path to the file + """ + return VDM.load_data_class_sync(file_path, MPRTimeReportAggregate) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 4367fbd30..24e58f89c 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -222,8 +222,6 @@ def plot(self, view_mode: bool) -> None: overhead_df['overhead_major_page_faults_rel'].replace([np.inf, -np.inf], np.nan, inplace=True) - # TODO: fix - overhead_df["overhead_major_page_faults_rel"].fillna(100, inplace=True) overhead_df['overhead_minor_page_faults_rel' ] = overhead_df['minor_page_faults'] / ( @@ -233,8 +231,6 @@ def plot(self, view_mode: bool) -> None: overhead_df['overhead_minor_page_faults_rel'].replace([np.inf, -np.inf], np.nan, inplace=True) - # TODO: fix - overhead_df["overhead_minor_page_faults_rel"].fillna(100, inplace=True) # Filesystem overhead_df['overhead_fs_inputs_rel'] = overhead_df['fs_inputs'] / ( @@ -325,8 +321,8 @@ def do_single_plot( ) ax.set_xlim(x_limit, 100) ax.tick_params(labelsize=20, pad=10) - ax.xaxis.label.set_size(25) - ax.yaxis.label.set_size(25) + ax.xaxis.label.set_fontsize(25) + ax.yaxis.label.set_fontsize(25) ax.yaxis.labelpad = 10 ax.xaxis.labelpad = 20 diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 3f785ed36..1e694cb61 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -20,13 +20,12 @@ VXray, PIMTracer, EbpfTraceTEF, - Baseline, compute_profiler_predictions, - OverheadData, load_precision_data, load_overhead_data, ) from varats.data.metrics import ConfusionMatrix +from varats.paper.case_study import CaseStudy from varats.paper.paper_config import get_loaded_paper_config from varats.project.project_domain import ProjectDomains from varats.project.project_util import get_local_project_git_path @@ -45,7 +44,7 @@ def cmap_map( This routine will break any discontinuous points in a colormap. """ - c_dict = cmap._segmentdata # pylint: disable=protected-access + c_dict = cmap._segmentdata # pylint: disable=protected-access # type: ignore step_dict: tp.Dict[str, tp.List[tp.Any]] = {} # First get the list of points where the segments start or end @@ -81,12 +80,10 @@ class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): """Table that compares the precision of different feature performance measurement approaches.""" - def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: - """Setup performance precision table.""" - case_studies = get_loaded_paper_config().get_all_case_studies() - profilers: tp.List[Profiler] = [VXray(), PIMTracer()] - - # Data aggregation + @staticmethod + def _prepare_data_table( + case_studies: tp.List[CaseStudy], profilers: tp.List[Profiler] + ): df = pd.DataFrame() table_rows = [] @@ -136,9 +133,16 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: new_row[f"{profiler.name}_baccuracy"] = np.nan table_rows.append(new_row) - # df.append(new_row, ignore_index=True) - df = pd.concat([df, pd.DataFrame(table_rows)]) + return pd.concat([df, pd.DataFrame(table_rows)]) + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + """Setup performance precision table.""" + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer()] + + # Data aggregation + df = self._prepare_data_table(case_studies, profilers) df.sort_values(["CaseStudy"], inplace=True) print(f"{df=}") @@ -350,8 +354,7 @@ def add_extras(doc: Document) -> None: ] style.format({col: "{:.0f}" for col in mv_columns}, precision=2) - ryg_map = plt.get_cmap('RdYlGn') - ryg_map = cmap_map(lambda x: x / 1.2 + 0.2, ryg_map) + ryg_map = cmap_map(lambda x: x / 1.2 + 0.2, plt.get_cmap('RdYlGn')) style.background_gradient( cmap=ryg_map, From 97283c9d796d3fe3e47309e3a5a142689a997fbb Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 27 Nov 2023 14:00:23 +0100 Subject: [PATCH 214/224] Apply suggestions from code review --- varats-core/varats/report/linux_perf_report.py | 1 - 1 file changed, 1 deletion(-) diff --git a/varats-core/varats/report/linux_perf_report.py b/varats-core/varats/report/linux_perf_report.py index 97f50059d..10e661dff 100644 --- a/varats-core/varats/report/linux_perf_report.py +++ b/varats-core/varats/report/linux_perf_report.py @@ -66,7 +66,6 @@ def __parse_ctx_switches(line: str) -> int: @staticmethod def __parse_branch_misses(line: str) -> int: - # TODO: fix return type if line.startswith(""): return np.NaN return int(line.split(" ")[0].replace(",", "")) From bb90d668bcb34eb76369f28c357b3f51528816c8 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 14 Dec 2023 11:31:27 +0100 Subject: [PATCH 215/224] Refactor baseline regression check into profiler --- .../feature_perf_precision_database.py | 88 ++++++++++--------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 1c8c5ddaa..b3756c198 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -458,6 +458,48 @@ def is_regression( return pim_regression_check(old_acc_pim, new_acc_pim) +class Baseline(Profiler): + """Profiler mapper implementation for the black-box baseline.""" + + def __init__(self) -> None: + super().__init__( + "Base", fpp.BlackBoxBaselineRunner, fpp.BlackBoxOverheadBaseline, + TimeReportAggregate + ) + + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + time_reports = load_mpr_time_report_aggregate(report_path) + + old_time = time_reports.get_baseline_report() + new_time = time_reports.get_report_for_patch(patch_name) + if not new_time: + raise LookupError(f"Missing new time report in file {report_path}") + + # Cut off regressions smaller than 100ms + req_diff = 0.1 + if np.mean(old_time.measurements_wall_clock_time + ) == np.mean(new_time.measurements_wall_clock_time): + return False + + if abs( + np.mean(old_time.measurements_wall_clock_time) - + np.mean(new_time.measurements_wall_clock_time) + ) < req_diff: + return False + + ttest_res = ttest_ind( + old_time.measurements_wall_clock_time, + new_time.measurements_wall_clock_time + ) + + if ttest_res.pvalue < 0.05: + return True + + return False + + def get_patch_names(case_study: CaseStudy) -> tp.List[str]: """Looks up all patch names from the given case study.""" report_files = get_processed_revisions_files( @@ -508,36 +550,11 @@ def get_regressing_config_ids_gt( ) return None - time_reports = load_mpr_time_report_aggregate( - report_files[0].full_path() + baseline_prof = Baseline() + ground_truth[config_id] = baseline_prof.is_regression( + report_files[0], patch_name ) - old_time = time_reports.get_baseline_report() - new_time = time_reports.get_report_for_patch(patch_name) - if not new_time: - return None - - # Cut off regressions smaller than 100ms - req_diff = 0.1 - if np.mean(old_time.measurements_wall_clock_time - ) == np.mean(new_time.measurements_wall_clock_time): - ground_truth[config_id] = False - elif abs( - np.mean(old_time.measurements_wall_clock_time) - - np.mean(new_time.measurements_wall_clock_time) - ) < req_diff: - ground_truth[config_id] = False - else: - ttest_res = ttest_ind( - old_time.measurements_wall_clock_time, - new_time.measurements_wall_clock_time - ) - - if ttest_res.pvalue < 0.05: - ground_truth[config_id] = True - else: - ground_truth[config_id] = False - return ground_truth @@ -551,21 +568,6 @@ def map_to_negative_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: ] -class Baseline(Profiler): - """Profiler mapper implementation for the black-box baseline.""" - - def __init__(self) -> None: - super().__init__( - "Base", fpp.BlackBoxBaselineRunner, fpp.BlackBoxOverheadBaseline, - TimeReportAggregate - ) - - def is_regression( - self, report_path: ReportFilepath, patch_name: str - ) -> bool: - raise NotImplementedError() - - def compute_profiler_predictions( profiler: Profiler, project_name: str, case_study: CaseStudy, config_ids: tp.List[int], patch_name: str From 1848abcd3840f125804d99c4e097bf9ed0289b1f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 23 Jan 2024 16:24:28 +0100 Subject: [PATCH 216/224] Refactors regression checking to unify baseline and statistical checking --- .../feature_perf_precision_database.py | 326 +++++++++--------- 1 file changed, 160 insertions(+), 166 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index b3756c198..0d05abc82 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -138,162 +138,6 @@ def get_matching_event( return feature_performances -def is_feature_relevant( - old_measurements: tp.List[int], - new_measurements: tp.List[int], - rel_cut_off: float = 0.01, - abs_cut_off: int = 20 -) -> bool: - """Check if a feature can be ignored for regression checking as it's time - measurements seem not relevant.""" - old_mean = np.mean(old_measurements) - new_mean = np.mean(new_measurements) - - if old_mean < abs_cut_off and new_mean < abs_cut_off: - return False - - old_rel_cut_off = old_mean * rel_cut_off - abs_mean_diff = abs(old_mean - new_mean) - if abs_mean_diff < old_rel_cut_off: - return False - - return True - - -def precise_pim_regression_check( - baseline_pim: tp.DefaultDict[str, tp.List[int]], - current_pim: tp.DefaultDict[str, tp.List[int]], - rel_cut_off: float = 0.01, - abs_cut_off: int = 20 -) -> bool: - """Compute if there was a regression in one of the feature terms of the - model between the current and the baseline, using a Mann-Whitney U test.""" - is_regression = False - abs_cut_off = 100 - - for feature, old_values in baseline_pim.items(): - if feature in current_pim: - if feature == "Base": - # The regression should be identified in actual feature code - continue - - new_values = current_pim[feature] - - # Skip features that seem not to be relevant for regressions testing - if not is_feature_relevant( - old_values, new_values, rel_cut_off, abs_cut_off - ): - continue - - ttest_res = ttest_ind(old_values, new_values) - - if ttest_res.pvalue < 0.05: - is_regression = True - else: - if np.mean(old_values) > abs_cut_off: - print( - f"Could not find feature {feature} in new trace. " - f"({np.mean(old_values)}us lost)" - ) - # TODO: how to handle this? - # raise NotImplementedError() - # is_regression = True - - return is_regression - - -def cliffs_delta_pim_regression_check( - baseline_pim: tp.DefaultDict[str, tp.List[int]], - current_pim: tp.DefaultDict[str, tp.List[int]], - rel_cut_off: float = 0.01, - abs_cut_off: int = 20 -) -> bool: - """Compute if there was a regression in one of the feature terms of the - model between the current and the baseline, using cliffs delta.""" - is_regression = False - - for feature, old_values in baseline_pim.items(): - if feature in current_pim: - if feature == "Base": - # The regression should be identified in actual feature code - continue - - new_values = current_pim[feature] - - if not is_feature_relevant( - old_values, new_values, rel_cut_off, abs_cut_off - ): - continue - - cdelta_val, _ = cliffs_delta(old_values, new_values) - - # if res == "large": - if abs(cdelta_val) > 0.7: - is_regression = True - else: - if np.mean(old_values) > abs_cut_off: - print( - f"Could not find feature {feature} in new trace. " - f"({np.mean(old_values)}us lost)" - ) - print(f"Could not find feature {feature} in new trace.") - # TODO: how to handle this? - # raise NotImplementedError() - # is_regression = True - - return is_regression - - -def sum_pim_regression_check( - baseline_pim: tp.DefaultDict[str, tp.List[int]], - current_pim: tp.DefaultDict[str, tp.List[int]], - rel_cut_off: float = 0.01, - abs_cut_off: int = 20 -) -> bool: - """ - Compute if there was a regression in the sum of the features in the model - between the current and the baseline. - - The comparision is done through a Mann-Whitney U test. - """ - baseline_pim_totals: tp.List[tp.List[int]] = [ - old_values for feature, old_values in baseline_pim.items() - if feature != "Base" - ] - current_pim_totals: tp.List[tp.List[int]] = [ - current_values for feature, current_values in current_pim.items() - if feature != "Base" - ] - - baseline_pim_total: tp.List[int] = [ - sum(values) for values in zip(*baseline_pim_totals) - ] - current_pim_total: tp.List[int] = [ - sum(values) for values in zip(*current_pim_totals) - ] - - if not baseline_pim_total and not current_pim_total: - # How do we get here? - return False - - mean_baseline = np.mean(baseline_pim_total) - mean_diff = abs(mean_baseline - np.mean(current_pim_total)) - if mean_diff < abs_cut_off or mean_diff < mean_baseline * rel_cut_off: - return False - - u_test = ttest_ind(baseline_pim_total, current_pim_total) - return u_test.pvalue < 0.05 # type: ignore - - -def pim_regression_check( - baseline_pim: tp.DefaultDict[str, tp.List[int]], - current_pim: tp.DefaultDict[str, tp.List[int]] -) -> bool: - """Compares two pims and determines if there was a regression between the - baseline and current.""" - return precise_pim_regression_check(baseline_pim, current_pim) - - class Profiler(): """Profiler interface to add different profilers to the evaluation.""" @@ -327,6 +171,161 @@ def report_type(self) -> tp.Type[BaseReport]: """Report type used to load this profilers information.""" return self.__report_type + @property + def relative_cut_off(self) -> float: + """Returns the relative cut off in percent below which regressions + should not be considered.""" + return 0.01 + + @property + def absolute_cut_off(self) -> int: + """Returns the absolute cut off in milliseconds below which regressions + should not be considered.""" + return 100 + + def _is_significantly_different( + self, old_values: tp.Sequence[tp.Union[float, int]], + new_values: tp.Sequence[tp.Union[float, int]] + ) -> bool: + """Checks if there is a significant difference between old and new + values.""" + return self.__ttest(old_values, new_values) + + def __ttest( # pylint: disable=W0238 + self, old_values: tp.Sequence[tp.Union[float, int]], + new_values: tp.Sequence[tp.Union[float, int]] + ) -> bool: + """Implements t-test.""" + ttest_res = ttest_ind(old_values, new_values) + + if ttest_res.pvalue < 0.05: + return True + + return False + + def __cliffs_delta( # pylint: disable=W0238 + self, old_values: tp.Sequence[tp.Union[float, int]], + new_values: tp.Sequence[tp.Union[float, int]] + ) -> bool: + """Implements cliffs_delta test.""" + cdelta_val, _ = cliffs_delta(old_values, new_values) + + # if res == "large": + if abs(cdelta_val) > 0.7: + return True + + return False + + def _is_feature_relevant( + self, old_measurements: tp.List[int], new_measurements: tp.List[int] + ) -> bool: + """Check if a feature can be ignored for regression checking as it's + time measurements seem not relevant.""" + old_mean = np.mean(old_measurements) + new_mean = np.mean(new_measurements) + + if old_mean < self.absolute_cut_off and new_mean < self.absolute_cut_off: + return False + + old_rel_cut_off = old_mean * self.relative_cut_off + abs_mean_diff = abs(old_mean - new_mean) + if abs_mean_diff < old_rel_cut_off: + return False + + return True + + def _precise_pim_regression_check( + self, baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] + ) -> bool: + """Compute if there was a regression in one of the feature terms of the + model between the current and the baseline, using a Mann-Whitney U + test.""" + is_regression = False + + for feature, old_values in baseline_pim.items(): + if feature in current_pim: + if feature == "Base": + # The regression should be identified in actual feature code + continue + + new_values = current_pim[feature] + + # Skip features that seem not to be relevant for regressions testing + if not self._is_feature_relevant(old_values, new_values): + continue + + is_regression = is_regression or self._is_significantly_different( + old_values, new_values + ) + else: + if np.mean(old_values) > self.absolute_cut_off: + print( + f"Could not find feature {feature} in new trace. " + f"({np.mean(old_values)}us lost)" + ) + # TODO: how to handle this? + # raise NotImplementedError() + # is_regression = True + + return is_regression + + def _sum_pim_regression_check( + self, baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] + ) -> bool: + """ + Compute if there was a regression in the sum of the features in the + model between the current and the baseline. + + The comparision is done through a Mann-Whitney U test. + """ + baseline_pim_totals: tp.List[tp.List[int]] = [ + old_values for feature, old_values in baseline_pim.items() + if feature != "Base" + ] + current_pim_totals: tp.List[tp.List[int]] = [ + current_values for feature, current_values in current_pim.items() + if feature != "Base" + ] + + baseline_pim_total: tp.List[int] = [ + sum(values) for values in zip(*baseline_pim_totals) + ] + current_pim_total: tp.List[int] = [ + sum(values) for values in zip(*current_pim_totals) + ] + + if not baseline_pim_total and not current_pim_total: + # How did we get here? + return False + + mean_baseline = np.mean(baseline_pim_total) + mean_diff = abs(mean_baseline - np.mean(current_pim_total)) + if mean_diff < self.absolute_cut_off \ + or mean_diff < mean_baseline * self.relative_cut_off: + return False + + return self._is_significantly_different( + baseline_pim_total, current_pim_total + ) + + def pim_regression_check( + self, baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] + ) -> bool: + """Compares two pims and determines if there was a regression between + the baseline and current.""" + return self._precise_pim_regression_check(baseline_pim, current_pim) + + def default_regression_check( + self, old_values: tp.Sequence[tp.Union[float, int]], + new_values: tp.Sequence[tp.Union[float, int]] + ) -> bool: + """Checks if there is a significant difference between old and new + values.""" + return self._is_significantly_different(old_values, new_values) + @abc.abstractmethod def is_regression( self, report_path: ReportFilepath, patch_name: str @@ -367,7 +366,7 @@ def is_regression( for feature, value in pim.items(): new_acc_pim[feature].append(value) - return pim_regression_check(old_acc_pim, new_acc_pim) + return self.pim_regression_check(old_acc_pim, new_acc_pim) class PIMTracer(Profiler): @@ -419,7 +418,7 @@ def is_regression( new_acc_pim = self.__aggregate_pim_data(opt_mr.reports()) - return pim_regression_check(old_acc_pim, new_acc_pim) + return self.pim_regression_check(old_acc_pim, new_acc_pim) class EbpfTraceTEF(Profiler): @@ -455,7 +454,7 @@ def is_regression( for feature, value in pim.items(): new_acc_pim[feature].append(value) - return pim_regression_check(old_acc_pim, new_acc_pim) + return self.pim_regression_check(old_acc_pim, new_acc_pim) class Baseline(Profiler): @@ -478,7 +477,7 @@ def is_regression( raise LookupError(f"Missing new time report in file {report_path}") # Cut off regressions smaller than 100ms - req_diff = 0.1 + req_diff = self.absolute_cut_off / 1000 if np.mean(old_time.measurements_wall_clock_time ) == np.mean(new_time.measurements_wall_clock_time): return False @@ -489,16 +488,11 @@ def is_regression( ) < req_diff: return False - ttest_res = ttest_ind( + return self.default_regression_check( old_time.measurements_wall_clock_time, new_time.measurements_wall_clock_time ) - if ttest_res.pvalue < 0.05: - return True - - return False - def get_patch_names(case_study: CaseStudy) -> tp.List[str]: """Looks up all patch names from the given case study.""" From 78d7a3d0447b96787334d5d37d6555d005fea8a8 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 23 Jan 2024 17:30:45 +0100 Subject: [PATCH 217/224] More clean up --- .../feature_perf_precision_database.py | 23 +++++++++---------- .../varats/tables/feature_perf_precision.py | 19 +++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 0d05abc82..02df1967d 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -224,7 +224,8 @@ def _is_feature_relevant( old_mean = np.mean(old_measurements) new_mean = np.mean(new_measurements) - if old_mean < self.absolute_cut_off and new_mean < self.absolute_cut_off: + if old_mean < self.absolute_cut_off and \ + new_mean < self.absolute_cut_off: return False old_rel_cut_off = old_mean * self.relative_cut_off @@ -251,22 +252,21 @@ def _precise_pim_regression_check( new_values = current_pim[feature] - # Skip features that seem not to be relevant for regressions testing + # Skip features that seem not to be relevant + # for regressions testing if not self._is_feature_relevant(old_values, new_values): continue - is_regression = is_regression or self._is_significantly_different( - old_values, new_values - ) + is_regression = is_regression or \ + self._is_significantly_different( + old_values, new_values + ) else: if np.mean(old_values) > self.absolute_cut_off: print( f"Could not find feature {feature} in new trace. " f"({np.mean(old_values)}us lost)" ) - # TODO: how to handle this? - # raise NotImplementedError() - # is_regression = True return is_regression @@ -302,8 +302,8 @@ def _sum_pim_regression_check( mean_baseline = np.mean(baseline_pim_total) mean_diff = abs(mean_baseline - np.mean(current_pim_total)) - if mean_diff < self.absolute_cut_off \ - or mean_diff < mean_baseline * self.relative_cut_off: + if mean_diff < self.absolute_cut_off or \ + mean_diff < mean_baseline * self.relative_cut_off: return False return self._is_significantly_different( @@ -596,14 +596,13 @@ def compute_profiler_predictions( report_files[0], patch_name ) except Exception as exception: # pylint: disable=W0718 + # Print exception information but continue working on the plot/table print( f"FAILURE: Skipping {config_id=} of {project_name=}, " f"profiler={profiler.name}" ) print(exception) print(traceback.format_exc()) - # TODO: clean up - # raise exception return result_dict diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 1e694cb61..ec0ca3060 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -36,7 +36,8 @@ def cmap_map( - function, cmap: colors.LinearSegmentedColormap + function: tp.Callable[[np.ndarray[np.float64]], np.float64], + cmap: colors.LinearSegmentedColormap ) -> colors.LinearSegmentedColormap: """ Applies function (which should operate on vectors of shape 3: [r, g, b]), on @@ -44,7 +45,9 @@ def cmap_map( This routine will break any discontinuous points in a colormap. """ - c_dict = cmap._segmentdata # pylint: disable=protected-access # type: ignore + # pylint: disable=protected-access,attr-defined + c_dict = cmap._segmentdata # type: ignore + # pylint: enable=protected-access,attr-defined step_dict: tp.Dict[str, tp.List[tp.Any]] = {} # First get the list of points where the segments start or end @@ -83,7 +86,7 @@ class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): @staticmethod def _prepare_data_table( case_studies: tp.List[CaseStudy], profilers: tp.List[Profiler] - ): + ) -> pd.DataFrame: df = pd.DataFrame() table_rows = [] @@ -109,7 +112,6 @@ def _prepare_data_table( } for profiler in profilers: - # TODO: multiple patch cycles predicted = compute_profiler_predictions( profiler, project_name, case_study, case_study.get_config_ids_for_revision(rev), patch_name @@ -285,8 +287,6 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: "CaseStudy", "Profiler", "time", "memory", "overhead_time", "overhead_memory" ]] - # print(f"{overhead_df=}") - # TODO: double check and refactor overhead_df['overhead_time_rel'] = overhead_df['time'] / ( overhead_df['time'] - overhead_df['overhead_time'] ) * 100 - 100 @@ -354,7 +354,10 @@ def add_extras(doc: Document) -> None: ] style.format({col: "{:.0f}" for col in mv_columns}, precision=2) - ryg_map = cmap_map(lambda x: x / 1.2 + 0.2, plt.get_cmap('RdYlGn')) + ryg_map = cmap_map( + lambda x: x / 1.2 + 0.2, + tp.cast(colors.LinearSegmentedColormap, plt.get_cmap('RdYlGn')) + ) style.background_gradient( cmap=ryg_map, @@ -462,9 +465,7 @@ def _calc_folder_locs_dune(repo_path: Path, rev_range: str) -> int: for sub_project in dune_sub_projects: sub_project_path = repo_path / sub_project - # TODO: get sub_rpoject hashes locs = calc_repo_loc(sub_project_path, "HEAD") - # print(f"Calculated {locs} for {sub_project_path}") total_locs += locs return total_locs From d054c401d3da69836ce078bbb8efa16f4ceb608c Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 23 Jan 2024 18:50:25 +0100 Subject: [PATCH 218/224] Remove wrong disable --- varats/varats/tables/feature_perf_precision.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index ec0ca3060..01b8e5625 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -45,9 +45,9 @@ def cmap_map( This routine will break any discontinuous points in a colormap. """ - # pylint: disable=protected-access,attr-defined + # pylint: disable=protected-access c_dict = cmap._segmentdata # type: ignore - # pylint: enable=protected-access,attr-defined + # pylint: enable=protected-access step_dict: tp.Dict[str, tp.List[tp.Any]] = {} # First get the list of points where the segments start or end From f2659b926f77eb2981e1e9b747b9af8b6e1e61e2 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 25 Jan 2024 10:34:54 +0100 Subject: [PATCH 219/224] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sebastian Böhm --- varats/varats/data/databases/feature_perf_precision_database.py | 2 +- varats/varats/plots/feature_perf_precision.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 02df1967d..5c03f2ae7 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -153,7 +153,7 @@ def __init__( @property def name(self) -> str: - """Hame of the profiler used.""" + """Name of the profiler used.""" return self.__name @property diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py index 24e58f89c..eee2a25cc 100644 --- a/varats/varats/plots/feature_perf_precision.py +++ b/varats/varats/plots/feature_perf_precision.py @@ -158,7 +158,6 @@ class PerfProfDistPlotGenerator( """Generates performance distribution plot.""" def generate(self) -> tp.List[Plot]: - return [PerfPrecisionDistPlot(self.plot_config, **self.plot_kwargs)] From 7350cb0c5dcbc682419514d3c873f02b0cb435c8 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 25 Jan 2024 10:39:17 +0100 Subject: [PATCH 220/224] Clean up from feedback --- .../databases/feature_perf_precision_database.py | 4 ++-- varats/varats/experiments/vara/feature_experiment.py | 1 - .../experiments/vara/feature_perf_precision.py | 12 ++++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py index 5c03f2ae7..8bdb1e1d6 100644 --- a/varats/varats/data/databases/feature_perf_precision_database.py +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -86,7 +86,6 @@ def get_matching_event( for trace_event in tef_report.trace_events: if trace_event.category == "Feature": if trace_event.event_type == TraceEventType.DURATION_EVENT_BEGIN: - # open_events.append(trace_event) # insert event at the top of the list open_events.insert(0, trace_event) elif trace_event.event_type == TraceEventType.DURATION_EVENT_END: @@ -163,7 +162,8 @@ def experiment(self) -> tp.Type[FeatureExperiment]: @property def overhead_experiment(self) -> tp.Type[FeatureExperiment]: - """Experiment used to produce this profilers information.""" + """Experiment used to produce overhead data that this profilers produced + when collecting information.""" return self.__overhead_experiment @property diff --git a/varats/varats/experiments/vara/feature_experiment.py b/varats/varats/experiments/vara/feature_experiment.py index 8f96019e1..3eacff35e 100644 --- a/varats/varats/experiments/vara/feature_experiment.py +++ b/varats/varats/experiments/vara/feature_experiment.py @@ -345,7 +345,6 @@ def run_traced_code(self) -> StepResult: pb_cmd = pb_cmd[get_extra_config_options( self.project )] - print(f"{pb_cmd=}") _, _, err = pb_cmd.run() xray = re.findall( r"XRay: Log file in '(.+?)'", diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index bc7c7b0a2..953d229cb 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -101,7 +101,7 @@ def get_threshold(project: VProject) -> int: "SynthSAFieldSensitivity", "SynthIPRuntime", "SynthIPTemplate", "SynthIPTemplate2", "SynthIPCombined" ]: - print("Don't instrument everything") + # Don't instrument everything for these synthtic projects return 10 return 0 @@ -571,7 +571,7 @@ def actions_for_project( ) -class RunBackBoxBaseline(OutputFolderStep): # type: ignore +class RunBlackBoxBaseline(OutputFolderStep): # type: ignore """Executes the traced project binaries on the specified workloads.""" NAME = "VaRARunTracedBinaries" @@ -689,7 +689,7 @@ def actions_for_project( patch_steps.append(ApplyPatch(project, patch)) patch_steps.append(ReCompile(project)) patch_steps.append( - RunBackBoxBaseline( + RunBlackBoxBaseline( project, binary, file_name=MPRTimeReportAggregate.create_patched_report_name( @@ -705,7 +705,7 @@ def actions_for_project( analysis_actions.append( ZippedExperimentSteps( result_filepath, [ - RunBackBoxBaseline( + RunBlackBoxBaseline( project, binary, file_name=MPRTimeReportAggregate. @@ -1081,7 +1081,7 @@ def actions_for_project( ) -class RunBackBoxBaselineOverhead(OutputFolderStep): # type: ignore +class RunBlackBoxBaselineOverhead(OutputFolderStep): # type: ignore """Executes the traced project binaries on the specified workloads.""" NAME = "VaRARunTracedBinaries" @@ -1189,7 +1189,7 @@ def actions_for_project( ZippedExperimentSteps( result_filepath, [ - RunBackBoxBaselineOverhead( # type: ignore + RunBlackBoxBaselineOverhead( # type: ignore project, binary ), From 174490c65dbd2cb7caf8071fe3fe04702183423f Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Thu, 25 Jan 2024 10:47:44 +0100 Subject: [PATCH 221/224] Mypy "rework" --- varats/varats/experiments/vara/feature_perf_precision.py | 5 ++++- varats/varats/tables/feature_perf_precision.py | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py index 953d229cb..b45d2801d 100644 --- a/varats/varats/experiments/vara/feature_perf_precision.py +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -162,7 +162,10 @@ class MPRPIMAggregate( def __init__(self, path: Path) -> None: # TODO: clean up report handling, we currently parse it as a TEFReport # as the file looks similar - super().__init__(path, PerfInfluenceTraceReportAggregate) + super().__init__( + path, + PerfInfluenceTraceReportAggregate # type: ignore + ) class RunGenTracedWorkloads(AnalysisProjectStepBase): # type: ignore diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 01b8e5625..b11c6baa6 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import numpy as np +import numpy.typing as npt import pandas as pd from benchbuild.utils.cmd import git from matplotlib import colors @@ -36,7 +37,7 @@ def cmap_map( - function: tp.Callable[[np.ndarray[np.float64]], np.float64], + function: tp.Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]], cmap: colors.LinearSegmentedColormap ) -> colors.LinearSegmentedColormap: """ @@ -57,7 +58,7 @@ def cmap_map( step_array = np.array(list(set(step_list))) # Then compute the LUT, and apply the function to the LUT - def reduced_cmap(step) -> np.ndarray: + def reduced_cmap(step: np.float64) -> npt.NDArray: return np.array(cmap(step)[0:3]) old_lut = np.array(list(map(reduced_cmap, step_array))) From b64fcdfa01d43b1738cab6706c42b8b094baaae0 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 26 Jan 2024 07:50:42 +0100 Subject: [PATCH 222/224] Group synthetics into categories --- .../varats/tables/feature_perf_precision.py | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index b11c6baa6..cc7a27b6a 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -304,6 +304,41 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: precision_df, overhead_df, on=["CaseStudy", "Profiler"] ) + group_synthetic_categories = True + if group_synthetic_categories: + + def compute_grouping(case_study_name: str) -> str: + if case_study_name.startswith("SynthSA"): + return "Static Analysis" + + if case_study_name.startswith( + "SynthDA" + ) or case_study_name.startswith("SynthOV"): + return "Dynamic Analysis" + + if case_study_name.startswith("SynthFeature"): + return "Configurability" + + if case_study_name.startswith( + "SynthCT" + ) or case_study_name.startswith("SynthIP"): + return "Implementation Pattern" + + return case_study_name + + merged_df["CaseStudy"] = merged_df["CaseStudy"].apply( + compute_grouping + ) + merged_df = merged_df.groupby(['CaseStudy', "Profiler"], + as_index=False).agg({ + 'precision': 'mean', + 'recall': 'mean', + 'overhead_time': 'mean', + 'overhead_time_rel': 'mean', + 'overhead_memory_rel': 'mean', + 'overhead_memory': 'mean' + }) + pivot_df = merged_df.pivot( index='CaseStudy', columns='Profiler', @@ -324,7 +359,23 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: ], axis=1) - pivot_df.loc["Total"] = pivot_df.mean() + # All means need to be computed before they are added as rows + overall_mean = pivot_df.mean() + if group_synthetic_categories: + synth_categories = [ + "Static Analysis", "Dynamic Analysis", "Configurability", + "Implementation Pattern" + ] + + synth_mean = pivot_df.loc[pivot_df.index.isin(synth_categories) + ].mean() + real_world_mean = pivot_df.loc[~pivot_df.index. + isin(synth_categories)].mean() + + pivot_df.loc["SynthMean"] = synth_mean + pivot_df.loc["RealWorldMean"] = real_world_mean + + pivot_df.loc["OverallMean"] = overall_mean # Rename columns # pylint: disable=anomalous-backslash-in-string From 7ce8aceff0b3086563452ee8e2d3537ef30d52c4 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 28 Jan 2024 08:26:02 +0100 Subject: [PATCH 223/224] Refactors cat grouping to include stats table --- .../varats/tables/feature_perf_precision.py | 62 +++++++++++-------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index cc7a27b6a..5f9148fab 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -35,6 +35,31 @@ from varats.table.tables import TableFormat, TableGenerator from varats.utils.git_util import calc_repo_loc, ChurnConfig +group_synthetic_categories = True + +synth_categories = [ + "Static Analysis", "Dynamic Analysis", "Configurability", + "Implementation Pattern" +] + + +def compute_cs_category_grouping(case_study_name: str) -> str: + if case_study_name.startswith("SynthSA"): + return "Static Analysis" + + if case_study_name.startswith("SynthDA" + ) or case_study_name.startswith("SynthOV"): + return "Dynamic Analysis" + + if case_study_name.startswith("SynthFeature"): + return "Configurability" + + if case_study_name.startswith("SynthCT" + ) or case_study_name.startswith("SynthIP"): + return "Implementation Pattern" + + return case_study_name + def cmap_map( function: tp.Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]], @@ -304,30 +329,10 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: precision_df, overhead_df, on=["CaseStudy", "Profiler"] ) - group_synthetic_categories = True if group_synthetic_categories: - def compute_grouping(case_study_name: str) -> str: - if case_study_name.startswith("SynthSA"): - return "Static Analysis" - - if case_study_name.startswith( - "SynthDA" - ) or case_study_name.startswith("SynthOV"): - return "Dynamic Analysis" - - if case_study_name.startswith("SynthFeature"): - return "Configurability" - - if case_study_name.startswith( - "SynthCT" - ) or case_study_name.startswith("SynthIP"): - return "Implementation Pattern" - - return case_study_name - merged_df["CaseStudy"] = merged_df["CaseStudy"].apply( - compute_grouping + compute_cs_category_grouping ) merged_df = merged_df.groupby(['CaseStudy', "Profiler"], as_index=False).agg({ @@ -362,11 +367,6 @@ def compute_grouping(case_study_name: str) -> str: # All means need to be computed before they are added as rows overall_mean = pivot_df.mean() if group_synthetic_categories: - synth_categories = [ - "Static Analysis", "Dynamic Analysis", "Configurability", - "Implementation Pattern" - ] - synth_mean = pivot_df.loc[pivot_df.index.isin(synth_categories) ].mean() real_world_mean = pivot_df.loc[~pivot_df.index. @@ -567,6 +567,16 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: cs_data.append(pd.DataFrame.from_dict(cs_dict, orient='index')) df = pd.concat(cs_data).sort_index() + df.index.name = 'CaseStudy' + + if group_synthetic_categories: + df.index = df.index.map(compute_cs_category_grouping) + + df = df.groupby(df.index.name, as_index=True).agg({ + 'NumConfig': 'sum', + 'Locs': 'sum', + 'Regressions': 'sum' + }) style = df.style kwargs: tp.Dict[str, tp.Any] = {} From 194b228722cc62119785c304de282e808a94db02 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 30 Jan 2024 15:38:01 +0100 Subject: [PATCH 224/224] Fixes pylint warning --- varats/varats/tables/feature_perf_precision.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py index 5f9148fab..212765368 100644 --- a/varats/varats/tables/feature_perf_precision.py +++ b/varats/varats/tables/feature_perf_precision.py @@ -35,15 +35,17 @@ from varats.table.tables import TableFormat, TableGenerator from varats.utils.git_util import calc_repo_loc, ChurnConfig -group_synthetic_categories = True +GROUP_SYNTHETIC_CATEGORIES = True -synth_categories = [ +SYNTH_CATEGORIES = [ "Static Analysis", "Dynamic Analysis", "Configurability", "Implementation Pattern" ] def compute_cs_category_grouping(case_study_name: str) -> str: + """Mapping function to transform individual project names to their synthtic + categories.""" if case_study_name.startswith("SynthSA"): return "Static Analysis" @@ -329,7 +331,7 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: precision_df, overhead_df, on=["CaseStudy", "Profiler"] ) - if group_synthetic_categories: + if GROUP_SYNTHETIC_CATEGORIES: merged_df["CaseStudy"] = merged_df["CaseStudy"].apply( compute_cs_category_grouping @@ -366,11 +368,11 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: # All means need to be computed before they are added as rows overall_mean = pivot_df.mean() - if group_synthetic_categories: - synth_mean = pivot_df.loc[pivot_df.index.isin(synth_categories) + if GROUP_SYNTHETIC_CATEGORIES: + synth_mean = pivot_df.loc[pivot_df.index.isin(SYNTH_CATEGORIES) ].mean() real_world_mean = pivot_df.loc[~pivot_df.index. - isin(synth_categories)].mean() + isin(SYNTH_CATEGORIES)].mean() pivot_df.loc["SynthMean"] = synth_mean pivot_df.loc["RealWorldMean"] = real_world_mean @@ -569,7 +571,7 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: df = pd.concat(cs_data).sort_index() df.index.name = 'CaseStudy' - if group_synthetic_categories: + if GROUP_SYNTHETIC_CATEGORIES: df.index = df.index.map(compute_cs_category_grouping) df = df.groupby(df.index.name, as_index=True).agg({