Skip to content

Commit

Permalink
Merge branch 'MHubAI:main' into m-gc-spider-baseline
Browse files Browse the repository at this point in the history
  • Loading branch information
silvandeleemput authored Nov 23, 2023
2 parents 4a4cf07 + efbd003 commit ea5774c
Show file tree
Hide file tree
Showing 14 changed files with 788 additions and 48 deletions.
416 changes: 416 additions & 0 deletions .github/schemas/meta.schema.json

Large diffs are not rendered by default.

63 changes: 63 additions & 0 deletions .github/scripts/mhub_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import utils
import os, sys, json

# introduction
print()
print("------------------------------------------------")
print("MHub Compliance Checks started.")
print("We will check for a correct folder setup, Dockerfile and meta.json compliance.")
print()

# print event path variable
PR = os.environ['PR_NUMBER']
print("Pull request: ", PR)

# get the first argument to this script which should be the list of modified files from an earlyer step
modified_files = json.loads(os.environ['MODIFIED_FILES'])
print("Modified files: ", "\n ".join(modified_files))

# modified models list
modified_models = list(set(fp.split("/")[1] for fp in modified_files))
print("Modified models: ", ", ".join(modified_models))

# we allow modifications only to a single file for now
# TODO: iterate model list (we can outsource model checks and then call a check_model script with the model name as argument)
if len(modified_models) != 1:
print("CHECK FAILED: ", "Exactly one model must be modified in a pull request.")
sys.exit(1)

# model name
model_name = modified_models[0]

# run compliance checks
try:
# check folder structure
utils.validateModelFolder(base='models', model_name=model_name)

# check meta.json
utils.validateModelMetaJson(model_meta_json_file=os.path.join('models', model_name, 'meta.json'))

# validate dockerfile
utils.validateDockerfile(base='models', model_name=model_name)

except utils.MHubComplianceError as e:
print()
print("---------------- CHECK FAILED ----------------")
print("This PR violates one or more MHub compliance rules:")
print(str(e))
print()
sys.exit(1)

except Exception as e:
print()
print("---------------- CHECK FAILED ----------------")
print("An unexpected error occured during compliance checks.")
print()
sys.exit(1)

# all checks passed
print()
print("---------------- CHECK PASSED ----------------")
print("All compliance checks passed.")
print("Note: compliance checks are a beta feature. Passing all automated compliance checks does not guarantee that your model is compliant with the MHub standard. We will now perform a manual review of your model. Testing your model on a public dataset is obligatory.")
print()
187 changes: 187 additions & 0 deletions .github/scripts/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
from typing import List, Union
from enum import Enum
import requests, os
import json
import jsonschema

# NOTE: all file path operations are relative to the repository root.

# references for documentation
class DocuRef(Enum):
MODEL_FOLDER_STRUCTURE = "https://github.com/MHubAI/documentation/blob/main/documentation/mhub_models/model_folder_structure.md"
DOCKERFILE = "https://github.com/MHubAI/documentation/blob/main/documentation/mhub_models/the_mhub_dockerfile.md"
CONFIG = "https://github.com/MHubAI/documentation/blob/main/documentation/mhubio/the_mhubio_config_file.md"
MHUBIO_MODULES = "https://github.com/MHubAI/documentation/blob/main/documentation/mhubio/mhubio_modules.md"
MODEL_META_JSON = "https://github.com/MHubAI/documentation/blob/main/documentation/mhub_models/model_json.md"

class MHubComplianceError(Exception):
"""Raised when a model is not compliant with MHub standards"""

def __init__(self, message: str, docu_ref: Union[DocuRef, List[DocuRef]]):
if isinstance(docu_ref, list):
msg = f"{message} (see {', '.join([d.value for d in docu_ref])})"
else:
msg = f"{message} ( see {docu_ref.value} for more information)"

super().__init__(msg)

def get_modified_files_from_PR(prid, repo = 'models') -> List[str]:

# GitHub API URL to list files modified in the PR
api_url = f"https://api.github.com/repos/MHubAI/{repo}/pulls/{prid}/files"

# Send a GET request to the GitHub API
response = requests.get(api_url)

if response.status_code != 200:
raise Exception(f"Failed to fetch modified files: {response.status_code}")

# Parse the JSON response and extract the file paths
modified_files = [file["filename"] for file in response.json()]

# return list of modified files
return modified_files

def get_modified_models_from_modified_files(modified_files: List[str]) -> List[str]:
modified_models = []

# Parse the JSON response and extract the file paths
for file in modified_files:

# get the model name (/models/<model_name>/...)
model_name = file.split("/")[1]
modified_models.append(model_name)

# remove duplicates
modified_models = list(set(modified_models))

return modified_models

def validateModelFolder(base: str, model_name: str):

model_path = os.path.join(base, model_name)

# check if the model folder exists
if not os.path.isdir(model_path):
raise MHubComplianceError(f"Model folder {model_path} does not exist", DocuRef.MODEL_FOLDER_STRUCTURE)

# check if the model folder contains the following and no additional ressources
# - /dockerfiles/Dockerfile
# - /config/default.yml
# - /utils
# - /meta.json

# check if the model folder contains a Dockerfile
dockerfile_path = os.path.join(model_path, "dockerfiles", "Dockerfile")
if not os.path.isfile(dockerfile_path):
raise MHubComplianceError(f"Model folder {model_path} does not contain a Dockerfile", [DocuRef.MODEL_FOLDER_STRUCTURE, DocuRef.DOCKERFILE])

# check if the model folder contains a default config
config_path = os.path.join(model_path, "config", "default.yml")
if not os.path.isfile(config_path):
raise MHubComplianceError(f"Model folder {model_path} does not contain a default workflow configuration", [DocuRef.MODEL_FOLDER_STRUCTURE, DocuRef.CONFIG])

# check if the model folder contains a utils folder
# NOTE: utils is not mandatory, however, all MHub-IO modules must be inside the utils folder if they exist.
# we can check modified files for any *.py and demand they're inside the utils folder.
#utils_path = os.path.join(model_path, "utils")
#if not os.path.isdir(utils_path):
# raise MHubComplianceError(f"Model folder {model_path} does not contain a utils folder")

# check if the model folder contains a model.json
model_json_path = os.path.join(model_path, "meta.json")
if not os.path.isfile(model_json_path):
raise MHubComplianceError(f"Model folder {model_path} does not contain a meta.json", [DocuRef.MODEL_FOLDER_STRUCTURE, DocuRef.MODEL_META_JSON])


def validateModelMetaJson(model_meta_json_file: str):

# load schema
with open(os.path.join('.github', 'schemas', 'meta.schema.json'), "r") as f:
schema = json.load(f)

# load model meta json
with open(model_meta_json_file, "r") as f:
model_meta_json = json.load(f)

# validate
try:
jsonschema.validate(instance=model_meta_json, schema=schema)
except jsonschema.ValidationError as e:
raise MHubComplianceError(f"Model meta json is not compliant with the schema: {e.message}", DocuRef.MODEL_META_JSON)

def validateDockerfile(base: str, model_name: str):

# get dockerfile path
model_dockerfile = os.path.join(base, model_name, "dockerfiles", "Dockerfile")

# read dockerfile
with open(model_dockerfile, "r") as f:
dockerfile = f.read()

# split dockerfile into lines
lines = dockerfile.split("\n")

# remove empty lines
lines = [line for line in lines if line.strip() != ""]

# check that the dockerfile contains only a single FROM command which
# is the first line of the file and is `FROM mhubai/base:latest`
if not lines[0].strip() == "FROM mhubai/base:latest":
raise MHubComplianceError(f"Dockerfile does not contain the correct FROM command: {lines[0]}", DocuRef.DOCKERFILE)

# some status variables from parsing the dockerfile
dockerfile_defines_arg_mhub_models_repo = False
dockerfile_contains_mhubio_import = False

# check that dockerfile contains no ADD or COPY commands
# We also don't allow changing the WORKDIR which is set to /app in the base and must be consistent across all models
# so no new line is allowed to start with ADD, COPY, WORKDIR, ..
for i, line in enumerate(lines):

# forbidden keywords

if line.startswith("WORKDIR"):
raise MHubComplianceError(f"WORKDIR must not be set to any other than `/app` as defined in our base image. {line}", DocuRef.DOCKERFILE)

if line.startswith("ADD") or line.startswith("COPY"):
raise MHubComplianceError(f"Dockerfile contains ADD or COPY command: {line}", DocuRef.DOCKERFILE)

if line.startswith("FROM") and i > 0:
raise MHubComplianceError(f"Dockerfile contains FROM command not at the beginning of the file: {line}", DocuRef.DOCKERFILE)

# required keywords & status variables

if line == "ARG MHUB_MODELS_REPO":
dockerfile_defines_arg_mhub_models_repo = True

if line == f"RUN buildutils/import_mhub_model.sh {model_name} ${{MHUB_MODELS_REPO}}":
dockerfile_contains_mhubio_import = True

# check if the dockerfile contains the required ARG MHUB_MODELS_REPO and model import
if not dockerfile_defines_arg_mhub_models_repo:
raise MHubComplianceError(f"Dockerfile does not define 'ARG MHUB_MODELS_REPO'", DocuRef.DOCKERFILE)

if not dockerfile_contains_mhubio_import:
raise MHubComplianceError(f"Dockerfile does not contain the required mhubio import command: 'RUN buildutils/import_mhub_model.sh {model_name} ${{MHUB_MODELS_REPO}}'.", DocuRef.DOCKERFILE)

# check that the entrypoint of the dockerfile matches
# ENTRYPOINT ["mhub.run"] | ENTRYPOINT ["python", "-m", "mhubio.run"]
if not lines[-2].strip() in ['ENTRYPOINT ["mhub.run"]', 'ENTRYPOINT ["python3", "-m", "mhubio.run"]']:
raise MHubComplianceError(f"Dockerfile does not contain the correct entrypoint: {lines[-2]}", DocuRef.DOCKERFILE)

# CMD ["--workflow", "default"] | CMD ["--config", "/app/models/$model_name/config/default.yml"]
if not lines[-1].strip() in ['CMD ["--workflow", "default"]', f'CMD ["--config", "/app/models/{model_name}/config/default.yml"]']:
raise MHubComplianceError(f"Dockerfile does not contain the correct entrypoint: {lines[-1]}", DocuRef.DOCKERFILE)


def get_model_configuration_files(base: str, model_name: str) -> List[str]:

# get config path
model_config_dir = os.path.join(base, model_name, "config")

# get workflow files
model_workflows = [cf[:-4] for cf in os.listdir(model_config_dir) if cf.endswith(".yml")]

# return list of configuration files
return model_workflows
49 changes: 49 additions & 0 deletions .github/workflows/model_compliance.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: MHub Contribution Check

on:
pull_request:
branches:
- "main"

env:
PR_NUMBER: ${{ github.event.number }}

jobs:
test:
name: Setup Compliance
runs-on: ubuntu-latest

steps:

# Checkout the latest code from the repo
- name: Checkout repo
uses: actions/checkout@v4

# Setup which version of Python to use
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.8

# Display the Python version being used
- name: Display Python version
run: python -c "import sys; print(sys.version)"

# Install Python dependencies
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install requests jsonschema
# Get the list of files modified in the PR
- name: Get files modified in PR
id: modified_files
uses: Ana06/[email protected]
with:
format: json

# Run check script
- name: Run MHub compliance test
run: python .github/scripts/mhub_check.py
env:
MODIFIED_FILES: ${{ steps.modified_files.outputs.all }}
File renamed without changes.
43 changes: 43 additions & 0 deletions base/buildutils/import_mhub_model.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

# Script to import the MHub model definition from GitHub.
# provide the name of the model as a parameter.
# Usage: utils/import_mhub_model.sh <model_name> <(repo_url=https://github.com/MHubAI/models.git::main)> <(branch=main)>

# parameters extraction
MODEL_NAME=$1
REPO_AND_BRANCH=${2:-https://github.com/MHubAI/models.git::main}
REPO_URL=$(echo $REPO_AND_BRANCH | awk -F :: '{print $1}')
REPO_BRANCH=$(echo $REPO_AND_BRANCH | awk -F :: '{print $2}')
REPO_BRANCH=${REPO_BRANCH:-$3}
REPO_BRANCH=${REPO_BRANCH:-main}

# printout paramaters (this happens during the docker build...)
echo "Importing model definition from MHub models repository."
echo "├── MODEL NAME ..... ${MODEL_NAME}"
echo "├── REPOSITORY ..... ${REPO_URL}"
echo "└── BRANCH ......... ${REPO_BRANCH}"
echo

# fail if model name is empty
if [ -z "$MODEL_NAME" ]; then
echo "Error: no model name provided."
exit 1
fi

# print a warning that the model definition is not from the
# the official MHub Models repository and therefore only
# suitable for development
if [ "$REPO_URL@$REPO_BRANCH" != "https://github.com/MHubAI/models.git@main" ]; then
echo
echo "Warning: the model definition is not from the official MHub Models repository and therefore only suitable for development."
echo
fi

# perform a sparse checkout of the model definition folder
# (models/<model_name>) from the referenced repository and branch
git init
git fetch ${REPO_URL} ${REPO_BRANCH}
git merge FETCH_HEAD
git sparse-checkout set "models/${MODEL_NAME}"
rm -r .git
7 changes: 4 additions & 3 deletions base/dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,14 @@ RUN pip3 install --upgrade pip && pip3 install --no-cache-dir \
RUN pip3 install git+https://github.com/MHubAI/mhubio.git \
&& pip3 install git+https://github.com/MHubAI/segdb.git \
&& git init \
&& git sparse-checkout set "base/utils" "base/bin" \
&& git sparse-checkout set "base/buildutils" "base/bin" \
&& git fetch https://github.com/MHubAI/models.git main \
&& git merge FETCH_HEAD \
&& mv base/utils . \
&& mv base/buildutils . \
&& chmod +x base/bin/* \
&& cp base/bin/* /usr/bin/ \
&& rm -r base
&& rm -r base \
&& rm -r .git

# Install DCMQI by pulling the latest release from GitHub (via GitHub API)
# Run everything in a single RUN command to avoid creating intermediate layers (and allowing environment variables to be used)
Expand Down
Loading

0 comments on commit ea5774c

Please sign in to comment.