From 2c955da0a3350155d82264fd8d4918fd92517ff9 Mon Sep 17 00:00:00 2001 From: Denys Savchenko <56398430+dsavchenko@users.noreply.github.com> Date: Tue, 27 Feb 2024 14:03:29 +0100 Subject: [PATCH] Galaxy (#40) * fix * make galaxy tools prototype * galaxy tool PR; container 4 galaxy mode * allow clean settings (no components) * no pr if exist * fix str + bytes * PR name * move logger.info components * galaxy requirements already merged * betetr naming * fix * version from master * fix tool id not allowed symbols * write last commit even if not changed * cleanup before generation * .shed.yml generation * fix typo * continue next repo if exception * shed repo name is tool_id * gitlab commit link in comment * fix filenotfound on new tool cleanup * reduce long lines and duplication * main nb2workflow in Dockerfile_galaxy * build galaxy bot action * branch name in tag for pr in action * action triggers * proper if in action --- .github/workflows/docker-image-galaxy.yml | 55 ++++ Dockerfile | 2 +- Dockerfile_galaxy | 14 + odabot/cli.py | 322 ++++++++++++++++++++-- settings.toml | 10 + setup.py | 35 ++- 6 files changed, 400 insertions(+), 38 deletions(-) create mode 100644 .github/workflows/docker-image-galaxy.yml create mode 100644 Dockerfile_galaxy diff --git a/.github/workflows/docker-image-galaxy.yml b/.github/workflows/docker-image-galaxy.yml new file mode 100644 index 0000000..28d4a14 --- /dev/null +++ b/.github/workflows/docker-image-galaxy.yml @@ -0,0 +1,55 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# GitHub recommends pinning actions to a commit SHA. +# To get a newer version, you will need to update the SHA. +# You can also reference a tag or branch, but the action may change without warning. + +name: Publish Docker image for Galaxy bot + +on: + push: + branches: + - master + pull_request: + types: + - labeled + - opened + - synchronize + - reopened + + + +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + if: contains( github.event.pull_request.labels.*.name, 'galaxy') + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v4 + + - name: Log in to Docker Hub + uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: odahub/odabot-galaxy + tags: | + type=raw,value={{sha}}-{{date 'YYMMDDHHmmss'}}-${{ github.head_ref || github.ref_name }} + + - name: Build and push Docker image + uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 + with: + context: . + file: ./Dockerfile_galaxy + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile b/Dockerfile index f7b2e75..fa238e8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM python:3.9 COPY . /source -RUN pip install -r /source/requirements.txt && pip install /source && rm -r /source +RUN pip install -r /source/requirements.txt && pip install /source[k8sdeploy] && rm -r /source RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" &&\ install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl && rm ./kubectl diff --git a/Dockerfile_galaxy b/Dockerfile_galaxy new file mode 100644 index 0000000..2ba1e72 --- /dev/null +++ b/Dockerfile_galaxy @@ -0,0 +1,14 @@ +FROM python:3.9 + +COPY . /source + +# RUN pip install git+https://github.com/oda-hub/nb2workflow@master#egg=nb2workflow[galaxy] +RUN pip install /source[galaxy] && rm -r /source + +# Add Tini +ENV TINI_VERSION v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini +RUN chmod +x /tini +ENTRYPOINT ["/tini", "--"] + +CMD ["odabot", "--settings", "/settings.toml", "make-galaxy-tools", "--loop", "10"] \ No newline at end of file diff --git a/odabot/cli.py b/odabot/cli.py index d9d48f3..ce146d0 100755 --- a/odabot/cli.py +++ b/odabot/cli.py @@ -4,32 +4,39 @@ import logging import os import re +import shutil import time import yaml import tempfile -import click -import subprocess +import subprocess as sp import requests from datetime import datetime import sys import traceback +import xml.etree.ElementTree as ET -import markdown -import rdflib - -from nb2workflow.deploy import build_container, deploy_k8s, ContainerBuildException -from nb2workflow import version as nb2wver -#from nb2workflow.validate import validate, patch_add_tests, patch_normalized_uris -from mmoda_tab_generator.tab_generator import MMODATabGenerator - -from .markdown_helper import convert_help +import click +from dynaconf import Dynaconf logger = logging.getLogger() -from dynaconf import Dynaconf +try: + import markdown + import rdflib + from nb2workflow.deploy import build_container, deploy_k8s, ContainerBuildException + from nb2workflow import version as nb2wver + #from nb2workflow.validate import validate, patch_add_tests, patch_normalized_uris + from mmoda_tab_generator.tab_generator import MMODATabGenerator + from .markdown_helper import convert_help +except ImportError: + logger.warning('Deployment dependencies not loaded') + +try: + from nb2workflow.galaxy import to_galaxy + import frontmatter +except ImportError: + logger.warning('Galaxy dependencies not loaded') -# `envvar_prefix` = export envvars with `export DYNACONF_FOO=bar`. -# `settings_files` = Load this files in the order. renkuapi = "https://gitlab.renkulab.io/api/v4/" renku_gid = 5606 @@ -111,9 +118,10 @@ def cli(obj, debug, settings): stream = sys.stdout, level=logging.DEBUG if debug else logging.INFO, format='\033[36m%(asctime)s %(levelname)s %(module)s\033[0m %(message)s', + force = True, ) - - logger.info("default logging level INFO") + + logger.info("logging level %s", 'INFO' if logger.level == 20 else 'DEBUG') settings_files=[ 'settings.toml', @@ -129,8 +137,8 @@ def cli(obj, debug, settings): ) obj['debug'] = debug - - logger.info("components: %s", obj['settings'].components) + + @cli.command() @@ -138,7 +146,7 @@ def cli(obj, debug, settings): @click.argument('component') def update_chart(component, branch): with tempfile.TemporaryDirectory() as chart_dir: - subprocess.check_call([ + sp.check_call([ "git", "clone", f"git@gitlab.astro.unige.ch:oda/{component}/{component}-chart.git", chart_dir, @@ -150,7 +158,7 @@ def update_chart(component, branch): # ]) try: - r = subprocess.check_call([ + r = sp.check_call([ "make", "-C", chart_dir, "update" ], env={**os.environ, @@ -159,10 +167,10 @@ def update_chart(component, branch): 'GIT_CONFIG_VALUE_0': 'false'} ) logger.error('\033[32msucceeded update (next to commit): %s\033[0m', r) - r = subprocess.check_call([ + r = sp.check_call([ "git", "push", "origin", branch ]) - except subprocess.CalledProcessError as e: + except sp.CalledProcessError as e: logger.error('\033[31mcan not update (maybe no updates available?): %s\033[0m', e) @@ -176,6 +184,8 @@ def poll_github_events(obj, ctx, source, forget): poll_interval_s = 60 logger.info('staring oda-bot') + + logger.info("components: %s", obj['settings'].components) try: last_event_id = yaml.safe_load(open('oda-bot-runtime.yaml'))[source]['last_event_id'] @@ -563,7 +573,7 @@ def update_workflows(obj, dry_run, force, loop, pattern): citation = acknowl, help_page = help_html) - subprocess.check_output(["kubectl", "exec", #"-it", + sp.check_output(["kubectl", "exec", #"-it", f"deployment/{frontend_deployment}", "-n", k8s_namespace, "--", "bash", "-c", @@ -651,7 +661,273 @@ def verify_workflows(obj): logger.info("%s: %s", r['@id'], json.dumps(r, indent=4)) api.get_instrument_description(r["http://odahub.io/ontology#service_name"][0]['@value']) + +@cli.command() +@click.option("--dry-run", is_flag=True) +@click.option("--loop", default=0) +@click.option("--force", is_flag=True) +@click.option("--pattern", default=".*") +@click.pass_obj +def make_galaxy_tools(obj, dry_run, loop, force, pattern): + tools_repo = obj['settings'].get('nb2galaxy.tools_repo', "https://github.com/esg-epfl-apc/tools-astro/") + target_tools_repo = obj['settings'].get('nb2galaxy.target_tools_repo', "https://github.com/esg-epfl-apc/tools-astro.git") + target_branch = obj['settings'].get('nb2galaxy.target_branch', "main") + repo_cache_dir = obj['settings'].get('nb2galaxy.repo_cache_path', "/nb2galaxy-cache") + state_storage = obj['settings'].get('nb2galaxy.state_storage', '/nb2galaxy-cache/oda-bot-runtime-galaxy.yaml') + git_name = obj['settings'].get('nb2galaxy.git_identity.name', 'ODA bot') + git_email = obj['settings'].get('nb2galaxy.git_identity.email', 'noreply@odahub.io') + git_credentials = obj['settings'].get('nb2galaxy.git_credentials', os.path.join(os.environ.get('HOME', '/'), '.git-credentials')) + + repo_cache_dir = os.path.abspath(repo_cache_dir) + state_storage = os.path.abspath(state_storage) + tools_repo_dir = os.path.join(repo_cache_dir, 'tools-astro') + + os.makedirs(repo_cache_dir, exist_ok=True) + + with open(git_credentials) as fd: + token = fd.read().split(':')[-1].split('@')[0] + + def git_clone_or_update(local_path, remote, branch='master', origin='origin'): + if os.path.isdir(local_path) and os.listdir(): + os.chdir(local_path) + try: + res = sp.run(['git', 'remote', 'get-url', '--push', origin], + check=True, capture_output=True, text=True) + if res.stdout.strip() != remote: + raise ValueError + sp.run(['git', 'checkout', branch], check=True) + sp.run(['git', 'pull', origin, branch], check=True) + sp.run(['git', 'remote', 'update', origin, '--prune']) + except (sp.CalledProcessError, ValueError): + raise RuntimeError('%s is not a valid tools repo', local_path) + else: + sp.run(['git', 'clone', remote, local_path], check=True) + + try: + oda_bot_runtime = yaml.safe_load(open(state_storage)) + except FileNotFoundError: + oda_bot_runtime = {} + + def make_pr(source_repo, source_branch, target_repo, target_branch, title='New PR', body=''): + repo_patt = re.compile(r'https://github\.com/(?P[^/]+)/(?P[^\.]+)\.git') + + m = repo_patt.match(source_repo) + s_user = m.group('user') + s_repo = m.group('repo') + + m = repo_patt.match(target_repo) + t_user = m.group('user') + t_repo = m.group('repo') + + api_url = f"https://api.github.com/repos/{t_user}/{t_repo}/pulls" + data = {'title': title, + 'body': body, + 'head': f'{s_user}:{source_branch}', + 'base': target_branch} + headers = {"Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28"} + + res = requests.get(api_url, params={'head': f'{s_user}:{source_branch}', 'state': 'open'}, headers=headers) + if res.status_code == 200: + if res.json() != []: + logger.info(f"Pull request already exist {res.json()[0]['html_url']}") + return res.json()[0] + else: + raise RuntimeError('Error getting PRs. Status: %s. Response text: %s', + res.status_code, + res.text) + res = requests.post(api_url, json=data, headers=headers) + + if res.status_code != 201: + raise RuntimeError('Error creating PR. Status: %s. Response text: %s', + res.status_code, + res.text) + else: + logger.info(f"New PR {res.json()['html_url']}") + return res.json() + + + + + + if "deployed_tools" not in oda_bot_runtime: + oda_bot_runtime["deployed_tools"] = {} + deployed_tools = oda_bot_runtime["deployed_tools"] + + git_clone_or_update(tools_repo_dir, tools_repo, target_branch) + os.chdir(tools_repo_dir) + sp.run(['git', 'config', 'user.name', git_name], check=True) + sp.run(['git', 'config', 'user.email', git_email], check=True) + sp.run(['git', 'config', 'credential.helper', f'store --file={git_credentials}']) + + while True: + git_clone_or_update(tools_repo_dir, tools_repo, target_branch) + try: + for project in requests.get(f'{renkuapi}groups/{renku_gid}/projects?include_subgroups=yes&order_by=last_activity_at').json(): + try: + if re.match(pattern, project['name']) and 'galaxy-tool' in project['topics']: + logger.info("%20s %s", project['name'], project['http_url_to_repo']) + logger.debug("%s", json.dumps(project)) + + last_commit = requests.get(f'{renkuapi}projects/{project["id"]}/repository/commits?per_page=1&page=1').json()[0] + last_commit_created_at = last_commit['created_at'] + + logger.info('last_commit %s from %s', last_commit, last_commit_created_at) + + saved_last_commit_created_at = deployed_tools.get(project['http_url_to_repo'], {}).get('last_commit_created_at', 0) + #saved_last_tool_version = deployed_tools.get(project['http_url_to_repo'], {}).get('last_tool_version', '0.0.0+galaxy0') + + logger.info('last_commit_created_at %s saved_last_commit_created_at %s', last_commit_created_at, saved_last_commit_created_at ) + + if last_commit_created_at == saved_last_commit_created_at and not force: + logger.info("no need to deploy this tool") + else: + wf_repo_dir = os.path.join(repo_cache_dir, project['path']) + git_clone_or_update(wf_repo_dir, project['http_url_to_repo']) + + def repo_file_path_if_available(filename): + if os.path.isfile(os.path.join(wf_repo_dir, filename)): + return os.path.join(wf_repo_dir, filename) + else: + return None + + req_file = repo_file_path_if_available('requirements.txt') + env_file = repo_file_path_if_available('environment.yml') + bib_file = repo_file_path_if_available('citations.bib') + help_file = repo_file_path_if_available('galaxy_help.md') + + os.chdir(tools_repo_dir) + tool_id = re.sub(r'[^a-z0-9_]', '_', f"{project['path']}_astro_tool") + tool_xml_path = os.path.join(tools_repo_dir, 'tools', project['path'], f"{tool_id}.xml") + if os.path.isfile(tool_xml_path): + tool_xml_root = ET.parse(tool_xml_path).getroot() + master_tool_version = tool_xml_root.attrib['version'] + tool_name = tool_xml_root.attrib['name'] + + version_parser = re.compile(r'(?P\d+)\.(?P\d+)\.(?P\d+)\+galaxy(?P\d+)') + m = version_parser.match(master_tool_version) + new_version = f"{m.group('maj')}.{m.group('min')}.{int(m.group('patch'))+1}+galaxy{m.group('suffix')}" + else: + new_version = "0.0.1+galaxy0" + tool_name = f"{project['name']}" + + upd_branch_name = f"auto-update-galaxy-tool-{project['path']}-v{new_version.replace('+', '-')}" + try: + sp.run(['git', 'checkout', upd_branch_name], check=True) + sp.run(['git', 'pull', 'origin', upd_branch_name]) + except sp.CalledProcessError: + sp.run(['git', 'checkout', '-b', upd_branch_name], check=True) + + # TODO: it could be optional or partial to preserve some manual additions + outd = os.path.join(tools_repo_dir, 'tools', project['path']) + shutil.rmtree(outd, ignore_errors=True) + + to_galaxy(input_path=wf_repo_dir, + toolname=tool_name, + out_dir=outd, + tool_version=new_version, + tool_id=tool_id, + requirements_file=req_file, + conda_environment_file=env_file, + citations_bibfile=bib_file, + help_file=help_file + ) + + # creating shed file + if os.path.isfile(os.path.join(wf_repo_dir, '.shed.yml')): + shutil.copyfile(os.path.join(wf_repo_dir, '.shed.yml'), + os.path.join(outd, '.shed.yml') + ) + else: + shed_content = { + 'name': tool_id, + 'owner': 'astroteam', + 'type': 'unrestricted', + 'categories': ['Astronomy'], + 'description': tool_name, + 'long_description': tool_name, + 'homepage_url': None, + 'remote_repository_url': 'https://github.com/esg-epfl-apc/tools-astro/tree/main/tools', + } + + if help_file is not None: + fm = frontmatter.load(help_file) + if 'description' in fm.keys(): + shed_content['description'] = fm['description'] + shed_content['long_description'] = fm.get('long_description', fm['description']) + + with open(os.path.join(outd, '.shed.yml'), 'wt') as fd: + yaml.dump(shed_content, fd) + + + logger.info("Git status:\n" + sp.check_output(['git', 'status'], text=True)) + + if dry_run: + logger.warning('Dry run. Cleaning up introduced updates.') + sp.run(['git', 'clean', '-fd'], check=True) + else: + try: + r = sp.run(['git', 'add', '.'], capture_output=True, text=True) + if r.returncode != 0: + r.check_returncode() + + r = sp.run(['git', 'commit', '-m', 'automatic update', '-m', f"following {last_commit['web_url']}"], capture_output=True, text=True) + if r.returncode == 1: + changed = False + elif r.returncode != 0: + r.check_returncode() + else: + changed = True + + if changed is True: + r = sp.run(['git', 'push', '--set-upstream', 'origin', upd_branch_name], + capture_output=True, text=True) + if r.returncode != 0: + r.check_returncode() + + make_pr(tools_repo, + upd_branch_name, + target_tools_repo, + target_branch, + f"Update tool {tool_name} to {new_version}") + + except: + logger.error(r.stderr) + raise + finally: + sp.run(['git', 'checkout', target_branch]) + sp.run(['git', 'branch', '-D', upd_branch_name]) + sp.run(['git', 'restore', '--staged', '.']) + sp.run(['git', 'clean', '-fd'], check=True) + + # if not changed: + # continue + + deployed_tools[project['http_url_to_repo']] = {'last_commit_created_at': last_commit_created_at, + 'last_commit': last_commit['id'], + 'last_tool_version': new_version} + + oda_bot_runtime['deployed_tools'] = deployed_tools + with open(state_storage, 'w') as fd: + yaml.dump(oda_bot_runtime, fd) + except: + logger.error("unexpected exception: %s", traceback.format_exc()) + logger.error("continue with the next repo") + continue + + except Exception: + logger.error("unexpected exception: %s", traceback.format_exc()) + + if loop > 0: + logger.info("sleeping %s", loop) + time.sleep(loop) + else: + break + + + #TODO: test service status and dispatcher status # oda-api -u https://dispatcher-staging.obsuks1.unige.ch get -i cta-example diff --git a/settings.toml b/settings.toml index 4bc84ac..ab72699 100644 --- a/settings.toml +++ b/settings.toml @@ -23,3 +23,13 @@ frontend = {instruments_dir = "/frontend-instruments/", deployment = "oda-fronte registry = "odahub" build_engine = "docker" state_storage = {type = "yaml", path = '/oda-bot-runtime-workflows.yaml'} + +[nb2galaxy] + +tools_repo = "https://github.com/dsavchenko/tools-astro.git" +target_branch = 'main' +target_tools_repo = "https://github.com/esg-epfl-apc/tools-astro.git" +repo_cache_path = "/nb2galaxy-cache" +state_storage = "/oda-bot-runtime-galaxy.yaml" +git_identity = {name = "ODA bot", email = "noreply@odahub.io"} +git_credentials = "/.git-credentials" \ No newline at end of file diff --git a/setup.py b/setup.py index 0354726..6554a6e 100644 --- a/setup.py +++ b/setup.py @@ -1,23 +1,32 @@ from setuptools import setup, find_packages install_req = [ - 'oda-knowledge-base', 'pyyaml', 'click', 'requests', 'dynaconf', - 'rdflib', - 'nb2workflow', - 'oda_api', - 'cwltool', - 'mmoda_tab_generator', - 'markdown', - 'markdown-katex' -] - -test_req = [ ] +extras_req = { + 'test': [ + 'pytest' + ], + 'k8sdeploy': [ + 'rdflib', + 'nb2workflow[k8s]', + 'oda_api', + 'cwltool', + 'mmoda_tab_generator', + 'markdown', + 'markdown-katex', + 'oda-knowledge-base', + ], + 'galaxy': [ + 'nb2workflow[galaxy]', + 'python-frontmatter' + ] + +} setup(name='oda-bot', version="0.1.0", @@ -28,7 +37,5 @@ entry_points={'console_scripts': ['odabot=odabot.cli:main']}, include_package_data=True, install_requires=install_req, - extras_require={ - 'test': test_req - } + extras_require=extras_req )