Skip to content

Commit

Permalink
Merge pull request #8 from mmcdermott/MEDS-DEV_v0
Browse files Browse the repository at this point in the history
Initial set-up for MEDS-DEV
  • Loading branch information
mmcdermott authored Aug 14, 2024
2 parents 437aa4e + 3abfab6 commit 030babe
Show file tree
Hide file tree
Showing 108 changed files with 824 additions and 5,854 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/code-quality-main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Same as `code-quality-pr.yaml` but triggered on commit to main branch
# and runs on all files (instead of only the changed ones)

name: Code Quality Main

on:
push:
branches: [main]

jobs:
code-quality:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Run pre-commits
uses: pre-commit/[email protected]
38 changes: 38 additions & 0 deletions .github/workflows/code-quality-pr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# This workflow finds which files were changed, prints them,
# and runs `pre-commit` on those files.

# Inspired by the sktime library:
# https://github.com/alan-turing-institute/sktime/blob/main/.github/workflows/test.yml

name: Code Quality PR

on:
pull_request:
branches: [main, "release/*", "dev"]

jobs:
code-quality:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Find modified files
id: file_changes
uses: trilom/[email protected]
with:
output: " "

- name: List modified files
run: echo '${{ steps.file_changes.outputs.files}}'

- name: Run pre-commits
uses: pre-commit/[email protected]
with:
extra_args: --files ${{ steps.file_changes.outputs.files}}
95 changes: 95 additions & 0 deletions .github/workflows/python-build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI

on: push

jobs:
build:
name: Build distribution 📦
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install pypa/build
run: >-
python3 -m
pip install
build
--user
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v4
with:
name: python-package-distributions
path: dist/

publish-to-pypi:
name: >-
Publish Python 🐍 distribution 📦 to PyPI
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
needs:
- build
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/MEDS-transforms # Replace <package-name> with your PyPI project name
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing

steps:
- name: Download all the dists
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/

- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1

github-release:
name: >-
Sign the Python 🐍 distribution 📦 with Sigstore
and upload them to GitHub Release
needs:
- publish-to-pypi
runs-on: ubuntu-latest

permissions:
contents: write # IMPORTANT: mandatory for making GitHub Releases
id-token: write # IMPORTANT: mandatory for sigstore

steps:
- name: Download all the dists
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/

- name: Sign the dists with Sigstore
uses: sigstore/[email protected]
with:
inputs: >-
./dist/*.tar.gz
./dist/*.whl
- name: Create GitHub Release
env:
GITHUB_TOKEN: ${{ github.token }}
run: >-
gh release create
'${{ github.ref_name }}'
--repo '${{ github.repository }}'
--notes ""
- name: Upload artifact signatures to GitHub Release
env:
GITHUB_TOKEN: ${{ github.token }}
# Upload to GitHub Release using the `gh` CLI.
# `dist/` contains the built packages, and the
# sigstore-produced signatures and certificates.
run: >-
gh release upload
'${{ github.ref_name }}' dist/**
--repo '${{ github.repository }}'
46 changes: 46 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Tests

on:
push:
branches: [main]
pull_request:
branches: [main, "release/*", "dev"]

jobs:
run_tests_ubuntu:
runs-on: ubuntu-latest

strategy:
fail-fast: false

timeout-minutes: 30

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Install packages
run: |
pip install -e .[tests]
#----------------------------------------------
# run test suite
#----------------------------------------------
- name: Run tests
run: |
pytest -v --doctest-modules --cov=src --junitxml=junit.xml -s --ignore=docs
- name: Upload coverage to Codecov
uses: codecov/[email protected]
with:
token: ${{ secrets.CODECOV_TOKEN }}
- name: Upload test results to Codecov
if: ${{ !cancelled() }}
uses: codecov/test-results-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
132 changes: 132 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
default_language_version:
python: python3.10

exclude: "docs/index.md"

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
# list of supported hooks: https://pre-commit.com/hooks.html
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-docstring-first
- id: check-yaml
- id: debug-statements
- id: detect-private-key
- id: check-executables-have-shebangs
- id: check-toml
- id: check-case-conflict
- id: check-added-large-files
args: [--maxkb, "800"]

# python code formatting
- repo: https://github.com/psf/black
rev: 23.7.0
hooks:
- id: black
args: [--line-length, "110"]

# python import sorting
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
args: ["--profile", "black", "--filter-files", "-o", "wandb"]

- repo: https://github.com/PyCQA/autoflake
rev: v2.2.0
hooks:
- id: autoflake
args: [--in-place, --remove-all-unused-imports]

# python upgrading syntax to newer version
- repo: https://github.com/asottile/pyupgrade
rev: v3.10.1
hooks:
- id: pyupgrade
args: [--py310-plus]

# python docstring formatting
- repo: https://github.com/myint/docformatter
rev: v1.7.5
hooks:
- id: docformatter
args: [--in-place, --wrap-summaries=110, --wrap-descriptions=110]

# python check (PEP8), programming errors and code complexity
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
hooks:
- id: flake8
args:
[
"--max-complexity=10",
"--extend-ignore",
"E402,E701,E251,E226,E302,W504,E704,E402,E401,C901,E203",
"--max-line-length=110",
"--exclude",
"logs/*,data/*",
"--per-file-ignores",
"__init__.py:F401",
]

# yaml formatting
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.0.3
hooks:
- id: prettier
types: [yaml]

# shell scripts linter
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.9.0.5
hooks:
- id: shellcheck

# md formatting
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.17
hooks:
- id: mdformat
args: ["--number"]
additional_dependencies:
- mdformat-gfm
- mdformat-tables
- mdformat_frontmatter
- mdformat-black
- mdformat-config
- mdformat-shfmt
- mdformat-mkdocs
- mdformat-toc
- mdformat-admon

# word spelling linter
- repo: https://github.com/codespell-project/codespell
rev: v2.2.5
hooks:
- id: codespell
args:
- --skip=logs/**,data/**,*.ipynb,*.bib,env.yml,env_cpu.yml,*.svg,poetry.lock
- --ignore-words-list=ehr,crate,infarction

# jupyter notebook cell output clearing
- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
hooks:
- id: nbstripout

# jupyter notebook linting
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.7.0
hooks:
- id: nbqa-black
args: ["--line-length=110"]
- id: nbqa-isort
args: ["--profile=black"]
- id: nbqa-flake8
args:
[
"--extend-ignore=E203,E402,E501,F401,F841",
"--exclude=logs/*,data/*",
]
15 changes: 10 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
# The Platform for Inductive Experiments over Medical Data (PIE MD)
This repository contains the dataset, task, model training recipes, and results for the PIE MD benchmarking
effort for EHR machine learning. See [this google doc](https://docs.google.com/document/d/1s-AR0qfoPmwJW4G7cIxdV6Z_cSEDGUir9BhO92Vb3wo/edit?usp=sharing) for more details.
# The MEDS Dynamic Extensible Validation (MEDS-DEV) Benchmark: Re-thinking Reproducibility and Validation in ML for Health

This repository contains the dataset, task, model training recipes, and results for the MEDS-DEV benchmarking
effort for EHR machine learning.

Note that this repository is _not_ a place where functional code is stored. Rather, this repository stores
configuration files, training recipes, results, etc. for the PIE MD benchmarking effort -- runnable code will
configuration files, training recipes, results, etc. for the MEDS-DEV benchmarking effort -- runnable code will
often come from other repositories, with suitable permalinks being present in the various configuration files
or commit messages for associated contributions to this repository.

## Contributing to PIE MD
## Contributing to MEDS-DEV

### To Add a Model

TODO

### To Add a Dataset

TODO

### To Add a Task

TODO

### To Add Results

TODO
Loading

0 comments on commit 030babe

Please sign in to comment.