Skip to content

Commit

Permalink
ci: migrate to uv and update build tools🔧 (#478)
Browse files Browse the repository at this point in the history
* ci: migrate to uv and update build tools🔧

* ci: update gh workflow to use uv🔧

* ci: set python version to 3.11

* tests: update resource setup in tests

* ci: fix coverage report generation🔧
  • Loading branch information
KarelZe authored Dec 2, 2024
1 parent 2b25376 commit 037e69a
Show file tree
Hide file tree
Showing 33 changed files with 5,823 additions and 5,306 deletions.
25 changes: 12 additions & 13 deletions .github/workflows/action_python.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: Python package

on:
push:
paths:
- 'src/**'
push:
# paths:
# - 'src/**'

jobs:
build:
Expand All @@ -25,19 +25,18 @@ jobs:
shell: bash
env:
WANDB_API_KEY: ${{secrets.WANDB_API_KEY}}
- name: Install poetry
run: pipx install poetry
- name: Set up Python and install dependencies
uses: actions/setup-python@v5
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
python-version: '3.10'
cache: 'poetry'
- name: install poetry
run: poetry install
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras --dev
- name: Test with pytest
run: |
poetry run pytest --cov=src tests/ --doctest-modules --junitxml=cover/xunit-result.xml --cov-report xml:cover/coverage.xml
poetry run coverage json -o cover/coverage.json
uv run pytest --cov=src tests/ --junitxml=cover/xunit-result.xml --cov-report xml:cover/coverage.xml
uv run coverage json cover/xunit-result.xml coverage.json
- name: "Extract numbers from cov report"
run: |
export TOTAL=$(python -c "import json;print(json.load(open('cover/coverage.json'))['totals']['percent_covered_display'])")
Expand Down
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,6 @@ target/
# Pycharm
.idea

# VS Code
.vscode/

# Spyder
.spyproject/

Expand Down
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ repos:
hooks:
- id: trailing-whitespace
- id: check-added-large-files
exclude: uv.lock
- id: check-builtin-literals
- id: check-byte-order-marker
- id: check-merge-conflict
Expand Down
21 changes: 21 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.autoTestDiscoverOnSaveEnabled": true,
"[python]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.codeActionsOnSave": {
"source.fixAll": "explicit",
"source.organizeImports": "explicit"
}
},
"notebook.formatOnSave.enabled": true,
"notebook.codeActionsOnSave": {
"notebook.source.fixAll": "explicit",
"notebook.source.organizeImports": "explicit"
}
}
22 changes: 10 additions & 12 deletions notebooks/1.0-mb-data-preprocessing-mem-reduce.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"ProgressBar.enable()\n",
"\n",
"import wandb\n",
"from tqdm.auto import tqdm\n"
"from tqdm.auto import tqdm"
]
},
{
Expand All @@ -47,7 +47,7 @@
"FILE_PATH_INPUT = (\n",
" \"gs://thesis-bucket-option-trade-classification/data/raw/matched_cboe_quotes.csv\"\n",
")\n",
"FILE_PATH_OUTPUT = \"gs://thesis-bucket-option-trade-classification/data/preprocessed/\"\n"
"FILE_PATH_OUTPUT = \"gs://thesis-bucket-option-trade-classification/data/preprocessed/\""
]
},
{
Expand All @@ -58,7 +58,7 @@
"source": [
"os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n",
"credentials, _ = google.auth.default()\n",
"fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)\n"
"fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)"
]
},
{
Expand All @@ -76,7 +76,7 @@
"source": [
"# connect to weights and biases\n",
"run = wandb.init(project=\"thesis\", job_type=\"dataset-creation\", entity=\"fbv\")\n",
"dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_csv\", type=\"raw_data\")\n"
"dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_csv\", type=\"raw_data\")"
]
},
{
Expand All @@ -88,8 +88,7 @@
"outputs": [],
"source": [
"def import_data(input_file: str) -> pd.DataFrame:\n",
" \"\"\"\n",
" create a dataframe and optimize its memory usage.\n",
" \"\"\"Create a dataframe and optimize its memory usage.\n",
"\n",
" I.e., apply some optimizations i.e, manual inference of dtypes, pre-selection\n",
" of unique columns and chunking to enable import.\n",
Expand Down Expand Up @@ -189,7 +188,7 @@
"\n",
" format = \"%d%b%y:%H:%M:%S\"\n",
" df[\"QUOTE_DATETIME\"] = pd.to_datetime(df[\"QUOTE_DATETIME\"], format=format)\n",
" return df\n"
" return df"
]
},
{
Expand All @@ -203,8 +202,7 @@
"def df_to_parquet(\n",
" x: pd.DataFrame, target_dir: str, chunk_size: int = 1000000, **parquet_wargs\n",
") -> None:\n",
" \"\"\"\n",
" Write pd.DataFrame to parquet format.\n",
" \"\"\"Write pd.DataFrame to parquet format.\n",
"\n",
" Args:\n",
" x (pd.DataFrame): input dataframe.\n",
Expand All @@ -222,7 +220,7 @@
" slc.to_parquet(output_path, **parquet_wargs)\n",
"\n",
" # log in w & b\n",
" dataset.add_reference(output_path, name=f\"raw_parquet_{chunk:04d}\")\n"
" dataset.add_reference(output_path, name=f\"raw_parquet_{chunk:04d}\")"
]
},
{
Expand Down Expand Up @@ -805,7 +803,7 @@
"client = Client()\n",
"\n",
"df = import_data(FILE_PATH_INPUT)\n",
"df_to_parquet(df, FILE_PATH_OUTPUT)\n"
"df_to_parquet(df, FILE_PATH_OUTPUT)"
]
},
{
Expand Down Expand Up @@ -833,7 +831,7 @@
"source": [
"# Log the artifact to save it as an output of this run\n",
"run.log_artifact(dataset)\n",
"wandb.finish()\n"
"wandb.finish()"
]
}
],
Expand Down
Loading

0 comments on commit 037e69a

Please sign in to comment.