ci: migrate to uv and update build tools🔧 (#478)

* ci: migrate to uv and update build tools🔧 * ci: update gh workflow to use uv🔧 * ci: set python version to 3.11 * tests: update resource setup in tests * ci: fix coverage report generation🔧
KarelZe · Dec 2, 2024 · 037e69a · 037e69a
1 parent 2b25376
commit 037e69a
Show file tree

Hide file tree

Showing 33 changed files with 5,823 additions and 5,306 deletions.
diff --git a/.github/workflows/action_python.yaml b/.github/workflows/action_python.yaml
@@ -1,9 +1,9 @@
 name: Python package
 
 on:
-  push:
-    paths:
-    - 'src/**'
+   push:
+#     paths:
+#     - 'src/**'
 
 jobs:
   build:
@@ -25,19 +25,18 @@ jobs:
       shell: bash
       env:
           WANDB_API_KEY: ${{secrets.WANDB_API_KEY}}
-    - name: Install poetry
-      run: pipx install poetry
-    - name: Set up Python and install dependencies
-      uses: actions/setup-python@v5
+    - name: Install uv
+      uses: astral-sh/setup-uv@v4
       with:
-        python-version: '3.10'
-        cache: 'poetry'
-    - name: install poetry
-      run: poetry install
+        enable-cache: true
+    - name: Set up Python
+      run: uv python install 3.11
+    - name: Install the project
+      run: uv sync --all-extras --dev
     - name: Test with pytest
       run: |
-        poetry run pytest --cov=src tests/ --doctest-modules --junitxml=cover/xunit-result.xml --cov-report xml:cover/coverage.xml
-        poetry run coverage json -o cover/coverage.json
+        uv run pytest --cov=src tests/ --junitxml=cover/xunit-result.xml --cov-report xml:cover/coverage.xml
+        uv run coverage json cover/xunit-result.xml coverage.json
     - name: "Extract numbers from cov report"
       run: |
         export TOTAL=$(python -c "import json;print(json.load(open('cover/coverage.json'))['totals']['percent_covered_display'])")

diff --git a/.gitignore b/.gitignore
@@ -68,9 +68,6 @@ target/
 # Pycharm
 .idea
 
-# VS Code
-.vscode/
-
 # Spyder
 .spyproject/
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -25,6 +25,7 @@ repos:
     hooks:
       - id: trailing-whitespace
       - id: check-added-large-files
+        exclude: uv.lock
       - id: check-builtin-literals
       - id: check-byte-order-marker
       - id: check-merge-conflict

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,21 @@
+{
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "python.testing.autoTestDiscoverOnSaveEnabled": true,
+    "[python]": {
+        "editor.formatOnSave": true,
+        "editor.defaultFormatter": "charliermarsh.ruff",
+        "editor.codeActionsOnSave": {
+            "source.fixAll": "explicit",
+            "source.organizeImports": "explicit"
+        }
+    },
+    "notebook.formatOnSave.enabled": true,
+    "notebook.codeActionsOnSave": {
+        "notebook.source.fixAll": "explicit",
+        "notebook.source.organizeImports": "explicit"
+    }
+}
diff --git a/notebooks/1.0-mb-data-preprocessing-mem-reduce.ipynb b/notebooks/1.0-mb-data-preprocessing-mem-reduce.ipynb
@@ -31,7 +31,7 @@
     "ProgressBar.enable()\n",
     "\n",
     "import wandb\n",
-    "from tqdm.auto import tqdm\n"
+    "from tqdm.auto import tqdm"
    ]
   },
   {
@@ -47,7 +47,7 @@
     "FILE_PATH_INPUT = (\n",
     "    \"gs://thesis-bucket-option-trade-classification/data/raw/matched_cboe_quotes.csv\"\n",
     ")\n",
-    "FILE_PATH_OUTPUT = \"gs://thesis-bucket-option-trade-classification/data/preprocessed/\"\n"
+    "FILE_PATH_OUTPUT = \"gs://thesis-bucket-option-trade-classification/data/preprocessed/\""
    ]
   },
   {
@@ -58,7 +58,7 @@
    "source": [
     "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n",
     "credentials, _ = google.auth.default()\n",
-    "fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)\n"
+    "fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)"
    ]
   },
   {
@@ -76,7 +76,7 @@
    "source": [
     "# connect to weights and biases\n",
     "run = wandb.init(project=\"thesis\", job_type=\"dataset-creation\", entity=\"fbv\")\n",
-    "dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_csv\", type=\"raw_data\")\n"
+    "dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_csv\", type=\"raw_data\")"
    ]
   },
   {
@@ -88,8 +88,7 @@
    "outputs": [],
    "source": [
     "def import_data(input_file: str) -> pd.DataFrame:\n",
-    "    \"\"\"\n",
-    "    create a dataframe and optimize its memory usage.\n",
+    "    \"\"\"Create a dataframe and optimize its memory usage.\n",
     "\n",
     "    I.e., apply some optimizations i.e, manual inference of dtypes, pre-selection\n",
     "    of unique columns and chunking to enable import.\n",
@@ -189,7 +188,7 @@
     "\n",
     "    format = \"%d%b%y:%H:%M:%S\"\n",
     "    df[\"QUOTE_DATETIME\"] = pd.to_datetime(df[\"QUOTE_DATETIME\"], format=format)\n",
-    "    return df\n"
+    "    return df"
    ]
   },
   {
@@ -203,8 +202,7 @@
     "def df_to_parquet(\n",
     "    x: pd.DataFrame, target_dir: str, chunk_size: int = 1000000, **parquet_wargs\n",
     ") -> None:\n",
-    "    \"\"\"\n",
-    "    Write pd.DataFrame to parquet format.\n",
+    "    \"\"\"Write pd.DataFrame to parquet format.\n",
     "\n",
     "    Args:\n",
     "        x (pd.DataFrame): input dataframe.\n",
@@ -222,7 +220,7 @@
     "        slc.to_parquet(output_path, **parquet_wargs)\n",
     "\n",
     "        # log in w & b\n",
-    "        dataset.add_reference(output_path, name=f\"raw_parquet_{chunk:04d}\")\n"
+    "        dataset.add_reference(output_path, name=f\"raw_parquet_{chunk:04d}\")"
    ]
   },
   {
@@ -805,7 +803,7 @@
     "client = Client()\n",
     "\n",
     "df = import_data(FILE_PATH_INPUT)\n",
-    "df_to_parquet(df, FILE_PATH_OUTPUT)\n"
+    "df_to_parquet(df, FILE_PATH_OUTPUT)"
    ]
   },
   {
@@ -833,7 +831,7 @@
    "source": [
     "# Log the artifact to save it as an output of this run\n",
     "run.log_artifact(dataset)\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   }
  ],