From 9a9e3fda5cde061de2d3d17b9e41d17f23af0a8c Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Tue, 12 Sep 2023 11:50:02 +0200 Subject: [PATCH 1/7] * Started implementation to avoid concurrent pulls in cluster mode --- .../varats/provider/patch/patch_provider.py | 47 ++++++++++++++++--- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index b370df208..6a083ccb9 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -6,9 +6,12 @@ """ import os +import random +import time import typing as tp import warnings from pathlib import Path +from time import sleep import benchbuild as bb import yaml @@ -237,12 +240,10 @@ class PatchProvider(Provider): def __init__(self, project: tp.Type[Project]): super().__init__(project) - # BB only performs a fetch so our repo might be out of date - pull_current_branch(self._get_patches_repository_path()) + self._update_local_patches_repo() + repo_path = self._get_patches_repository_path() - patches_project_dir = Path( - self._get_patches_repository_path() / self.project.NAME - ) + patches_project_dir = Path(repo_path / self.project.NAME) if not patches_project_dir.is_dir(): warnings.warn( @@ -316,6 +317,38 @@ def create_default_provider( @classmethod def _get_patches_repository_path(cls) -> Path: - cls.patches_source.fetch() - return Path(target_prefix()) / cls.patches_source.local + + @classmethod + def _update_local_patches_repo(cls): + PULL_THRESHOLD = 300 + + repo_path = cls._get_patches_repository_path() + + if not os.path.exists(repo_path): + cls.patches_source.fetch() + + # We don't want to pull as long as index.lock exists + + index_lock_path = repo_path / ".git" / "index.lock" + + while os.path.exists(index_lock_path): + sleep(1) + + # Add random wait to reduce possibility of concurrent pull afterwards? + sleep(random.uniform(2.0, 5.0)) + + last_pull_path = repo_path / ".last_pull" + + if os.path.exists(last_pull_path): + last_pull_d = time.time() - os.path.getmtime(last_pull_path) + else: + # We want to ensure that we make the pull, so we set the time above the threshhold + last_pull_d = PULL_THRESHOLD + + if last_pull_d < PULL_THRESHOLD: + return + + last_pull_path.touch(exist_ok=True) + + pull_current_branch(repo_path) From 17e2bf95dfd94c7ba7d23ef2719a9ca1c409d6fe Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 13 Sep 2023 10:13:33 +0200 Subject: [PATCH 2/7] * Refactored parallel patch provider to use flock and context manager * Added context manager for file locks --- .../varats/provider/patch/patch_provider.py | 33 +++---------------- varats-core/varats/utils/filesystem_util.py | 19 ++++++++++- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 6a083ccb9..f3b81d401 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -17,6 +17,7 @@ import yaml from benchbuild.project import Project from benchbuild.source.base import target_prefix +from utils.filesystem_util import lock_file from yaml import YAMLError from varats.project.project_util import get_local_project_git_path @@ -321,34 +322,8 @@ def _get_patches_repository_path(cls) -> Path: @classmethod def _update_local_patches_repo(cls): - PULL_THRESHOLD = 300 + lock_path = Path(target_prefix()) / "patch_provider.lock" - repo_path = cls._get_patches_repository_path() - - if not os.path.exists(repo_path): + with lock_file(lock_path): cls.patches_source.fetch() - - # We don't want to pull as long as index.lock exists - - index_lock_path = repo_path / ".git" / "index.lock" - - while os.path.exists(index_lock_path): - sleep(1) - - # Add random wait to reduce possibility of concurrent pull afterwards? - sleep(random.uniform(2.0, 5.0)) - - last_pull_path = repo_path / ".last_pull" - - if os.path.exists(last_pull_path): - last_pull_d = time.time() - os.path.getmtime(last_pull_path) - else: - # We want to ensure that we make the pull, so we set the time above the threshhold - last_pull_d = PULL_THRESHOLD - - if last_pull_d < PULL_THRESHOLD: - return - - last_pull_path.touch(exist_ok=True) - - pull_current_branch(repo_path) + pull_current_branch(cls._get_patches_repository_path()) diff --git a/varats-core/varats/utils/filesystem_util.py b/varats-core/varats/utils/filesystem_util.py index 6f71f01d9..27d0b032e 100644 --- a/varats-core/varats/utils/filesystem_util.py +++ b/varats-core/varats/utils/filesystem_util.py @@ -1,6 +1,8 @@ """Utility functions for handling filesystem related tasks.""" - +import fcntl +import os.path import typing as tp +from contextlib import contextmanager from pathlib import Path @@ -13,3 +15,18 @@ def __init__(self, folder: tp.Union[Path, str]) -> None: f"Folder: '{str(folder)}' should be created " "but was already present." ) + + +@contextmanager +def lock_file(lock_path: Path, lock_mode=fcntl.LOCK_EX): + # Ensure that the lock exists + lock_path.touch(exist_ok=True) + + open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC + lock_fd = os.open(lock_path, open_mode) + try: + fcntl.flock(lock_fd, lock_mode) + yield lock_fd + finally: + fcntl.flock(lock_fd, fcntl.LOCK_UN) + os.close(lock_fd) From 89ea5fd0487b48717c49fb8915b269d5bf6e9393 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 13 Sep 2023 13:47:46 +0200 Subject: [PATCH 3/7] * Fix imports --- varats-core/varats/provider/patch/patch_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index f3b81d401..7f2e49ba5 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -17,11 +17,11 @@ import yaml from benchbuild.project import Project from benchbuild.source.base import target_prefix -from utils.filesystem_util import lock_file from yaml import YAMLError from varats.project.project_util import get_local_project_git_path from varats.provider.provider import Provider, ProviderType +from varats.utils.filesystem_util import lock_file from varats.utils.git_commands import pull_current_branch, fetch_repository from varats.utils.git_util import ( CommitHash, From 92406afd94d3842e1de49e8b46d3ceb65c070c88 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 13 Sep 2023 13:55:50 +0200 Subject: [PATCH 4/7] * Fix mypy --- varats-core/varats/utils/filesystem_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats-core/varats/utils/filesystem_util.py b/varats-core/varats/utils/filesystem_util.py index 27d0b032e..a818bc9e9 100644 --- a/varats-core/varats/utils/filesystem_util.py +++ b/varats-core/varats/utils/filesystem_util.py @@ -18,7 +18,7 @@ def __init__(self, folder: tp.Union[Path, str]) -> None: @contextmanager -def lock_file(lock_path: Path, lock_mode=fcntl.LOCK_EX): +def lock_file(lock_path: Path, lock_mode: int = fcntl.LOCK_EX) -> tp.Generator: # Ensure that the lock exists lock_path.touch(exist_ok=True) From f49906aa2f0028569693e07afc5698c229a9e615 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Wed, 13 Sep 2023 14:06:19 +0200 Subject: [PATCH 5/7] * Remove unused imports * removed fd that is yielded --- varats-core/varats/provider/patch/patch_provider.py | 3 --- varats-core/varats/utils/filesystem_util.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 7f2e49ba5..9aeaeeeed 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -6,12 +6,9 @@ """ import os -import random -import time import typing as tp import warnings from pathlib import Path -from time import sleep import benchbuild as bb import yaml diff --git a/varats-core/varats/utils/filesystem_util.py b/varats-core/varats/utils/filesystem_util.py index a818bc9e9..4cad4e489 100644 --- a/varats-core/varats/utils/filesystem_util.py +++ b/varats-core/varats/utils/filesystem_util.py @@ -26,7 +26,7 @@ def lock_file(lock_path: Path, lock_mode: int = fcntl.LOCK_EX) -> tp.Generator: lock_fd = os.open(lock_path, open_mode) try: fcntl.flock(lock_fd, lock_mode) - yield lock_fd + yield finally: fcntl.flock(lock_fd, fcntl.LOCK_UN) os.close(lock_fd) From 5484fab4c979c7e47b1318445f7300229aa2c59f Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Thu, 14 Sep 2023 09:12:45 +0200 Subject: [PATCH 6/7] * Removed explicit touch for lock file --- varats-core/varats/utils/filesystem_util.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/varats-core/varats/utils/filesystem_util.py b/varats-core/varats/utils/filesystem_util.py index 4cad4e489..bc44c3265 100644 --- a/varats-core/varats/utils/filesystem_util.py +++ b/varats-core/varats/utils/filesystem_util.py @@ -19,9 +19,6 @@ def __init__(self, folder: tp.Union[Path, str]) -> None: @contextmanager def lock_file(lock_path: Path, lock_mode: int = fcntl.LOCK_EX) -> tp.Generator: - # Ensure that the lock exists - lock_path.touch(exist_ok=True) - open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC lock_fd = os.open(lock_path, open_mode) try: From 6f2d3f057dad676f18714fc41bfbe1def66f57e6 Mon Sep 17 00:00:00 2001 From: Lukas Abelt Date: Thu, 14 Sep 2023 14:25:47 +0200 Subject: [PATCH 7/7] * Add feedback from review --- varats-core/varats/provider/patch/patch_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats-core/varats/provider/patch/patch_provider.py b/varats-core/varats/provider/patch/patch_provider.py index 9aeaeeeed..d466a629b 100644 --- a/varats-core/varats/provider/patch/patch_provider.py +++ b/varats-core/varats/provider/patch/patch_provider.py @@ -241,7 +241,7 @@ def __init__(self, project: tp.Type[Project]): self._update_local_patches_repo() repo_path = self._get_patches_repository_path() - patches_project_dir = Path(repo_path / self.project.NAME) + patches_project_dir = repo_path / self.project.NAME if not patches_project_dir.is_dir(): warnings.warn(