From 8b65a90d0b39cc8254c47256584af8cd24bc0893 Mon Sep 17 00:00:00 2001 From: Alex Myers Date: Sat, 17 Feb 2024 10:15:11 -0600 Subject: [PATCH] reckless: Clone github sources when API access fails Due to the API ratelimit, this allows cloning a github repo and searching the result rather than searching via the REST API. If a source has already been cloned, it is fetched and the default branch checked out. Fixes a failure reported by @farscapian Changelog-Fixed: Reckless no longer fails on github API ratelimit. --- tools/reckless | 125 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 6 deletions(-) diff --git a/tools/reckless b/tools/reckless index cec2c13b950f..9ac9e118c67b 100755 --- a/tools/reckless +++ b/tools/reckless @@ -17,6 +17,7 @@ import types from typing import Union from urllib.parse import urlparse from urllib.request import urlopen +from urllib.error import HTTPError import venv @@ -144,7 +145,8 @@ class InstInfo: target = SourceDir(self.source_loc, srctype=self.srctype) # Set recursion for how many directories deep we should search depth = 0 - if self.srctype in [Source.DIRECTORY, Source.LOCAL_REPO]: + if self.srctype in [Source.DIRECTORY, Source.LOCAL_REPO, + Source.GIT_LOCAL_CLONE]: depth = 5 elif self.srctype == Source.GITHUB_REPO: depth = 1 @@ -196,7 +198,28 @@ class InstInfo: return success return None - result = search_dir(self, target, False, depth) + try: + result = search_dir(self, target, False, depth) + # Using the rest API of github.com may result in a + # "Error 403: rate limit exceeded" or other access issues. + # Fall back to cloning and searching the local copy instead. + except HTTPError: + result = None + if self.srctype == Source.GITHUB_REPO: + # clone source to reckless dir + target = copy_remote_git_source(self) + if not target: + logging.warning(f"could not clone github source {self}") + return False + logging.debug(f"falling back to cloning remote repo {self}") + # Update to reflect use of a local clone + self.source_loc = target.location + self.srctype = target.srctype + result = search_dir(self, target, False, 5) + + if not result: + return False + if result: if result != target: if result.relative: @@ -238,6 +261,8 @@ class Source(Enum): GITHUB_REPO = 3 OTHER_URL = 4 UNKNOWN = 5 + # Cloned from remote source before searching (rather than github API) + GIT_LOCAL_CLONE = 6 @classmethod def get_type(cls, source: str): @@ -256,6 +281,16 @@ class Source(Enum): return cls(4) return cls(5) + @classmethod + def get_github_user_repo(cls, source: str) -> (str, str): + 'extract a github username and repository name' + if 'github.com/' not in source.lower(): + return None, None + trailing = Path(source.lower().partition('github.com/')[2]).parts + if len(trailing) < 2: + return None, None + return trailing[0], trailing[1] + class SourceDir(): """Structure to search source contents.""" @@ -280,7 +315,7 @@ class SourceDir(): # logging.debug(f"populating {self.srctype} {self.location}") if self.srctype == Source.DIRECTORY: self.contents = populate_local_dir(self.location) - elif self.srctype == Source.LOCAL_REPO: + elif self.srctype in [Source.LOCAL_REPO, Source.GIT_LOCAL_CLONE]: self.contents = populate_local_repo(self.location) elif self.srctype == Source.GITHUB_REPO: self.contents = populate_github_repo(self.location) @@ -440,6 +475,11 @@ def source_element_from_repo_api(member: dict): def populate_github_repo(url: str) -> list: + """populate one level of a github repository via REST API""" + # Forces search to clone remote repos (for blackbox testing) + if GITHUB_API_FALLBACK: + with tempfile.NamedTemporaryFile() as tmp: + raise HTTPError(url, 403, 'simulated ratelimit', {}, tmp) # FIXME: This probably contains leftover cruft. repo = url.split('/') while '' in repo: @@ -483,6 +523,28 @@ def populate_github_repo(url: str) -> list: return contents +def copy_remote_git_source(github_source: InstInfo): + """clone or fetch & checkout a local copy of a remote git repo""" + user, repo = Source.get_github_user_repo(github_source.source_loc) + if not user or not repo: + logging.warning('could not extract github user and repo ' + f'name for {github_source.source_loc}') + return None + local_path = RECKLESS_DIR / '.remote_sources' / user + create_dir(RECKLESS_DIR / '.remote_sources') + if not create_dir(local_path): + logging.warning(f'could not provision dir {local_path} to ' + f'clone remote source {github_source.source_loc}') + return None + local_path = local_path / repo + if local_path.exists(): + # Fetch the latest + assert _git_update(github_source, local_path) + else: + _git_clone(github_source, local_path) + return SourceDir(local_path, srctype=Source.GIT_LOCAL_CLONE) + + class Config(): """A generic class for procuring, reading and editing config files""" def obtain_config(self, @@ -808,7 +870,8 @@ def _git_clone(src: InstInfo, dest: Union[PosixPath, str]) -> bool: if src.srctype == Source.GITHUB_REPO: assert 'github.com' in src.source_loc source = f"{GITHUB_COM}" + src.source_loc.split("github.com")[-1] - elif src.srctype in [Source.LOCAL_REPO, Source.OTHER_URL]: + elif src.srctype in [Source.LOCAL_REPO, Source.OTHER_URL, + Source.GIT_LOCAL_CLONE]: source = src.source_loc else: return False @@ -824,6 +887,52 @@ def _git_clone(src: InstInfo, dest: Union[PosixPath, str]) -> bool: return True +def _git_update(github_source: InstInfo, local_copy: PosixPath): + # Ensure this is the correct source + git = run(['git', 'remote', 'set-url', 'origin', github_source.source_loc], + cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True, + check=False, timeout=60) + assert git.returncode == 0 + if git.returncode != 0: + return False + + # Fetch the latest from the remote + git = run(['git', 'fetch', 'origin', '--recurse-submodules=on-demand'], + cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True, + check=False, timeout=60) + assert git.returncode == 0 + if git.returncode != 0: + return False + + # Find default branch + git = run(['git', 'symbolic-ref', 'refs/remotes/origin/HEAD', '--short'], + cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True, + check=False, timeout=60) + assert git.returncode == 0 + if git.returncode != 0: + return False + default_branch = git.stdout.splitlines()[0] + if default_branch != 'origin/master': + logging.debug(f'UNUSUAL: fetched default branch {default_branch} for ' + f'{github_source.source_loc}') + + # Checkout default branch + git = run(['git', 'checkout', default_branch], + cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True, + check=False, timeout=60) + assert git.returncode == 0 + if git.returncode != 0: + return False + + git = run(['git', 'submodule', 'update', '--init', '--recursive'], + cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True, + check=False, timeout=60) + if git.returncode != 0: + return False + + return True + + def get_temp_reckless_dir() -> PosixPath: random_dir = 'reckless-{}'.format(str(hash(os.times()))[-9:]) new_path = Path(tempfile.gettempdir()) / random_dir @@ -855,7 +964,7 @@ def _checkout_commit(orig_src: InstInfo, cloned_path: PosixPath): # Check out and verify commit/tag if source was a repository if orig_src.srctype in [Source.LOCAL_REPO, Source.GITHUB_REPO, - Source.OTHER_URL]: + Source.OTHER_URL, Source.GIT_LOCAL_CLONE]: if orig_src.commit: logging.debug(f"Checking out {orig_src.commit}") checkout = Popen(['git', 'checkout', orig_src.commit], @@ -917,7 +1026,7 @@ def _install_plugin(src: InstInfo) -> Union[InstInfo, None]: create_dir(clone_path) shutil.copytree(src.source_loc, plugin_path) elif src.srctype in [Source.LOCAL_REPO, Source.GITHUB_REPO, - Source.OTHER_URL]: + Source.OTHER_URL, Source.GIT_LOCAL_CLONE]: # clone git repository to /tmp/reckless-... if not _git_clone(src, plugin_path): return None @@ -1406,6 +1515,10 @@ if __name__ == '__main__': GITHUB_COM = os.environ['REDIR_GITHUB'] logging.root.setLevel(args.loglevel) + GITHUB_API_FALLBACK = False + if 'GITHUB_API_FALLBACK' in os.environ: + GITHUB_API_FALLBACK = os.environ['GITHUB_API_FALLBACK'] + if 'targets' in args: # FIXME: Catch missing argument if args.func.__name__ == 'help_alias':