-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'MHubAI:main' into m-gc-spider-baseline
- Loading branch information
Showing
14 changed files
with
788 additions
and
48 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import utils | ||
import os, sys, json | ||
|
||
# introduction | ||
print() | ||
print("------------------------------------------------") | ||
print("MHub Compliance Checks started.") | ||
print("We will check for a correct folder setup, Dockerfile and meta.json compliance.") | ||
print() | ||
|
||
# print event path variable | ||
PR = os.environ['PR_NUMBER'] | ||
print("Pull request: ", PR) | ||
|
||
# get the first argument to this script which should be the list of modified files from an earlyer step | ||
modified_files = json.loads(os.environ['MODIFIED_FILES']) | ||
print("Modified files: ", "\n ".join(modified_files)) | ||
|
||
# modified models list | ||
modified_models = list(set(fp.split("/")[1] for fp in modified_files)) | ||
print("Modified models: ", ", ".join(modified_models)) | ||
|
||
# we allow modifications only to a single file for now | ||
# TODO: iterate model list (we can outsource model checks and then call a check_model script with the model name as argument) | ||
if len(modified_models) != 1: | ||
print("CHECK FAILED: ", "Exactly one model must be modified in a pull request.") | ||
sys.exit(1) | ||
|
||
# model name | ||
model_name = modified_models[0] | ||
|
||
# run compliance checks | ||
try: | ||
# check folder structure | ||
utils.validateModelFolder(base='models', model_name=model_name) | ||
|
||
# check meta.json | ||
utils.validateModelMetaJson(model_meta_json_file=os.path.join('models', model_name, 'meta.json')) | ||
|
||
# validate dockerfile | ||
utils.validateDockerfile(base='models', model_name=model_name) | ||
|
||
except utils.MHubComplianceError as e: | ||
print() | ||
print("---------------- CHECK FAILED ----------------") | ||
print("This PR violates one or more MHub compliance rules:") | ||
print(str(e)) | ||
print() | ||
sys.exit(1) | ||
|
||
except Exception as e: | ||
print() | ||
print("---------------- CHECK FAILED ----------------") | ||
print("An unexpected error occured during compliance checks.") | ||
print() | ||
sys.exit(1) | ||
|
||
# all checks passed | ||
print() | ||
print("---------------- CHECK PASSED ----------------") | ||
print("All compliance checks passed.") | ||
print("Note: compliance checks are a beta feature. Passing all automated compliance checks does not guarantee that your model is compliant with the MHub standard. We will now perform a manual review of your model. Testing your model on a public dataset is obligatory.") | ||
print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
from typing import List, Union | ||
from enum import Enum | ||
import requests, os | ||
import json | ||
import jsonschema | ||
|
||
# NOTE: all file path operations are relative to the repository root. | ||
|
||
# references for documentation | ||
class DocuRef(Enum): | ||
MODEL_FOLDER_STRUCTURE = "https://github.com/MHubAI/documentation/blob/main/documentation/mhub_models/model_folder_structure.md" | ||
DOCKERFILE = "https://github.com/MHubAI/documentation/blob/main/documentation/mhub_models/the_mhub_dockerfile.md" | ||
CONFIG = "https://github.com/MHubAI/documentation/blob/main/documentation/mhubio/the_mhubio_config_file.md" | ||
MHUBIO_MODULES = "https://github.com/MHubAI/documentation/blob/main/documentation/mhubio/mhubio_modules.md" | ||
MODEL_META_JSON = "https://github.com/MHubAI/documentation/blob/main/documentation/mhub_models/model_json.md" | ||
|
||
class MHubComplianceError(Exception): | ||
"""Raised when a model is not compliant with MHub standards""" | ||
|
||
def __init__(self, message: str, docu_ref: Union[DocuRef, List[DocuRef]]): | ||
if isinstance(docu_ref, list): | ||
msg = f"{message} (see {', '.join([d.value for d in docu_ref])})" | ||
else: | ||
msg = f"{message} ( see {docu_ref.value} for more information)" | ||
|
||
super().__init__(msg) | ||
|
||
def get_modified_files_from_PR(prid, repo = 'models') -> List[str]: | ||
|
||
# GitHub API URL to list files modified in the PR | ||
api_url = f"https://api.github.com/repos/MHubAI/{repo}/pulls/{prid}/files" | ||
|
||
# Send a GET request to the GitHub API | ||
response = requests.get(api_url) | ||
|
||
if response.status_code != 200: | ||
raise Exception(f"Failed to fetch modified files: {response.status_code}") | ||
|
||
# Parse the JSON response and extract the file paths | ||
modified_files = [file["filename"] for file in response.json()] | ||
|
||
# return list of modified files | ||
return modified_files | ||
|
||
def get_modified_models_from_modified_files(modified_files: List[str]) -> List[str]: | ||
modified_models = [] | ||
|
||
# Parse the JSON response and extract the file paths | ||
for file in modified_files: | ||
|
||
# get the model name (/models/<model_name>/...) | ||
model_name = file.split("/")[1] | ||
modified_models.append(model_name) | ||
|
||
# remove duplicates | ||
modified_models = list(set(modified_models)) | ||
|
||
return modified_models | ||
|
||
def validateModelFolder(base: str, model_name: str): | ||
|
||
model_path = os.path.join(base, model_name) | ||
|
||
# check if the model folder exists | ||
if not os.path.isdir(model_path): | ||
raise MHubComplianceError(f"Model folder {model_path} does not exist", DocuRef.MODEL_FOLDER_STRUCTURE) | ||
|
||
# check if the model folder contains the following and no additional ressources | ||
# - /dockerfiles/Dockerfile | ||
# - /config/default.yml | ||
# - /utils | ||
# - /meta.json | ||
|
||
# check if the model folder contains a Dockerfile | ||
dockerfile_path = os.path.join(model_path, "dockerfiles", "Dockerfile") | ||
if not os.path.isfile(dockerfile_path): | ||
raise MHubComplianceError(f"Model folder {model_path} does not contain a Dockerfile", [DocuRef.MODEL_FOLDER_STRUCTURE, DocuRef.DOCKERFILE]) | ||
|
||
# check if the model folder contains a default config | ||
config_path = os.path.join(model_path, "config", "default.yml") | ||
if not os.path.isfile(config_path): | ||
raise MHubComplianceError(f"Model folder {model_path} does not contain a default workflow configuration", [DocuRef.MODEL_FOLDER_STRUCTURE, DocuRef.CONFIG]) | ||
|
||
# check if the model folder contains a utils folder | ||
# NOTE: utils is not mandatory, however, all MHub-IO modules must be inside the utils folder if they exist. | ||
# we can check modified files for any *.py and demand they're inside the utils folder. | ||
#utils_path = os.path.join(model_path, "utils") | ||
#if not os.path.isdir(utils_path): | ||
# raise MHubComplianceError(f"Model folder {model_path} does not contain a utils folder") | ||
|
||
# check if the model folder contains a model.json | ||
model_json_path = os.path.join(model_path, "meta.json") | ||
if not os.path.isfile(model_json_path): | ||
raise MHubComplianceError(f"Model folder {model_path} does not contain a meta.json", [DocuRef.MODEL_FOLDER_STRUCTURE, DocuRef.MODEL_META_JSON]) | ||
|
||
|
||
def validateModelMetaJson(model_meta_json_file: str): | ||
|
||
# load schema | ||
with open(os.path.join('.github', 'schemas', 'meta.schema.json'), "r") as f: | ||
schema = json.load(f) | ||
|
||
# load model meta json | ||
with open(model_meta_json_file, "r") as f: | ||
model_meta_json = json.load(f) | ||
|
||
# validate | ||
try: | ||
jsonschema.validate(instance=model_meta_json, schema=schema) | ||
except jsonschema.ValidationError as e: | ||
raise MHubComplianceError(f"Model meta json is not compliant with the schema: {e.message}", DocuRef.MODEL_META_JSON) | ||
|
||
def validateDockerfile(base: str, model_name: str): | ||
|
||
# get dockerfile path | ||
model_dockerfile = os.path.join(base, model_name, "dockerfiles", "Dockerfile") | ||
|
||
# read dockerfile | ||
with open(model_dockerfile, "r") as f: | ||
dockerfile = f.read() | ||
|
||
# split dockerfile into lines | ||
lines = dockerfile.split("\n") | ||
|
||
# remove empty lines | ||
lines = [line for line in lines if line.strip() != ""] | ||
|
||
# check that the dockerfile contains only a single FROM command which | ||
# is the first line of the file and is `FROM mhubai/base:latest` | ||
if not lines[0].strip() == "FROM mhubai/base:latest": | ||
raise MHubComplianceError(f"Dockerfile does not contain the correct FROM command: {lines[0]}", DocuRef.DOCKERFILE) | ||
|
||
# some status variables from parsing the dockerfile | ||
dockerfile_defines_arg_mhub_models_repo = False | ||
dockerfile_contains_mhubio_import = False | ||
|
||
# check that dockerfile contains no ADD or COPY commands | ||
# We also don't allow changing the WORKDIR which is set to /app in the base and must be consistent across all models | ||
# so no new line is allowed to start with ADD, COPY, WORKDIR, .. | ||
for i, line in enumerate(lines): | ||
|
||
# forbidden keywords | ||
|
||
if line.startswith("WORKDIR"): | ||
raise MHubComplianceError(f"WORKDIR must not be set to any other than `/app` as defined in our base image. {line}", DocuRef.DOCKERFILE) | ||
|
||
if line.startswith("ADD") or line.startswith("COPY"): | ||
raise MHubComplianceError(f"Dockerfile contains ADD or COPY command: {line}", DocuRef.DOCKERFILE) | ||
|
||
if line.startswith("FROM") and i > 0: | ||
raise MHubComplianceError(f"Dockerfile contains FROM command not at the beginning of the file: {line}", DocuRef.DOCKERFILE) | ||
|
||
# required keywords & status variables | ||
|
||
if line == "ARG MHUB_MODELS_REPO": | ||
dockerfile_defines_arg_mhub_models_repo = True | ||
|
||
if line == f"RUN buildutils/import_mhub_model.sh {model_name} ${{MHUB_MODELS_REPO}}": | ||
dockerfile_contains_mhubio_import = True | ||
|
||
# check if the dockerfile contains the required ARG MHUB_MODELS_REPO and model import | ||
if not dockerfile_defines_arg_mhub_models_repo: | ||
raise MHubComplianceError(f"Dockerfile does not define 'ARG MHUB_MODELS_REPO'", DocuRef.DOCKERFILE) | ||
|
||
if not dockerfile_contains_mhubio_import: | ||
raise MHubComplianceError(f"Dockerfile does not contain the required mhubio import command: 'RUN buildutils/import_mhub_model.sh {model_name} ${{MHUB_MODELS_REPO}}'.", DocuRef.DOCKERFILE) | ||
|
||
# check that the entrypoint of the dockerfile matches | ||
# ENTRYPOINT ["mhub.run"] | ENTRYPOINT ["python", "-m", "mhubio.run"] | ||
if not lines[-2].strip() in ['ENTRYPOINT ["mhub.run"]', 'ENTRYPOINT ["python3", "-m", "mhubio.run"]']: | ||
raise MHubComplianceError(f"Dockerfile does not contain the correct entrypoint: {lines[-2]}", DocuRef.DOCKERFILE) | ||
|
||
# CMD ["--workflow", "default"] | CMD ["--config", "/app/models/$model_name/config/default.yml"] | ||
if not lines[-1].strip() in ['CMD ["--workflow", "default"]', f'CMD ["--config", "/app/models/{model_name}/config/default.yml"]']: | ||
raise MHubComplianceError(f"Dockerfile does not contain the correct entrypoint: {lines[-1]}", DocuRef.DOCKERFILE) | ||
|
||
|
||
def get_model_configuration_files(base: str, model_name: str) -> List[str]: | ||
|
||
# get config path | ||
model_config_dir = os.path.join(base, model_name, "config") | ||
|
||
# get workflow files | ||
model_workflows = [cf[:-4] for cf in os.listdir(model_config_dir) if cf.endswith(".yml")] | ||
|
||
# return list of configuration files | ||
return model_workflows |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
name: MHub Contribution Check | ||
|
||
on: | ||
pull_request: | ||
branches: | ||
- "main" | ||
|
||
env: | ||
PR_NUMBER: ${{ github.event.number }} | ||
|
||
jobs: | ||
test: | ||
name: Setup Compliance | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
|
||
# Checkout the latest code from the repo | ||
- name: Checkout repo | ||
uses: actions/checkout@v4 | ||
|
||
# Setup which version of Python to use | ||
- name: Setup Python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.8 | ||
|
||
# Display the Python version being used | ||
- name: Display Python version | ||
run: python -c "import sys; print(sys.version)" | ||
|
||
# Install Python dependencies | ||
- name: Install Python dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install requests jsonschema | ||
# Get the list of files modified in the PR | ||
- name: Get files modified in PR | ||
id: modified_files | ||
uses: Ana06/[email protected] | ||
with: | ||
format: json | ||
|
||
# Run check script | ||
- name: Run MHub compliance test | ||
run: python .github/scripts/mhub_check.py | ||
env: | ||
MODIFIED_FILES: ${{ steps.modified_files.outputs.all }} |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!/bin/bash | ||
|
||
# Script to import the MHub model definition from GitHub. | ||
# provide the name of the model as a parameter. | ||
# Usage: utils/import_mhub_model.sh <model_name> <(repo_url=https://github.com/MHubAI/models.git::main)> <(branch=main)> | ||
|
||
# parameters extraction | ||
MODEL_NAME=$1 | ||
REPO_AND_BRANCH=${2:-https://github.com/MHubAI/models.git::main} | ||
REPO_URL=$(echo $REPO_AND_BRANCH | awk -F :: '{print $1}') | ||
REPO_BRANCH=$(echo $REPO_AND_BRANCH | awk -F :: '{print $2}') | ||
REPO_BRANCH=${REPO_BRANCH:-$3} | ||
REPO_BRANCH=${REPO_BRANCH:-main} | ||
|
||
# printout paramaters (this happens during the docker build...) | ||
echo "Importing model definition from MHub models repository." | ||
echo "├── MODEL NAME ..... ${MODEL_NAME}" | ||
echo "├── REPOSITORY ..... ${REPO_URL}" | ||
echo "└── BRANCH ......... ${REPO_BRANCH}" | ||
echo | ||
|
||
# fail if model name is empty | ||
if [ -z "$MODEL_NAME" ]; then | ||
echo "Error: no model name provided." | ||
exit 1 | ||
fi | ||
|
||
# print a warning that the model definition is not from the | ||
# the official MHub Models repository and therefore only | ||
# suitable for development | ||
if [ "$REPO_URL@$REPO_BRANCH" != "https://github.com/MHubAI/models.git@main" ]; then | ||
echo | ||
echo "Warning: the model definition is not from the official MHub Models repository and therefore only suitable for development." | ||
echo | ||
fi | ||
|
||
# perform a sparse checkout of the model definition folder | ||
# (models/<model_name>) from the referenced repository and branch | ||
git init | ||
git fetch ${REPO_URL} ${REPO_BRANCH} | ||
git merge FETCH_HEAD | ||
git sparse-checkout set "models/${MODEL_NAME}" | ||
rm -r .git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.