From 7c8ae1622c187cc77fde9afede8d6787de6f1498 Mon Sep 17 00:00:00 2001 From: Fateme Tardasti Date: Fri, 12 Jul 2024 12:05:41 +0200 Subject: [PATCH 1/7] update column extension function names and desc in readme --- README.md | 20 ++++++++++---------- quinn/functions.py | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 5bc3e910..1fbaa060 100644 --- a/README.md +++ b/README.md @@ -476,41 +476,41 @@ from quinn.extensions import * ### Column Extensions -**isFalsy()** +**is_falsy()** -Returns `True` if `has_stuff` is `None` or `False`. +Returns a Column indicating whether all values in the Column are False or NULL: `True` if `has_stuff` is `None` or `False`. ```python source_df.withColumn("is_stuff_falsy", F.col("has_stuff").isFalsy()) ``` -**isTruthy()** +**is_truthy()** -Returns `True` unless `has_stuff` is `None` or `False`. +Calculates a boolean expression that is the opposite of is_falsy for the given Column: `True` unless `has_stuff` is `None` or `False`. ```python source_df.withColumn("is_stuff_truthy", F.col("has_stuff").isTruthy()) ``` -**isNullOrBlank()** +**is_null_or_blank()** -Returns `True` if `blah` is `null` or blank (the empty string or a string that only contains whitespace). +Returns a Boolean value which expresses whether a given column is NULL or contains only blank characters: `True` if `blah` is `null` or blank (the empty string or a string that only contains whitespace). ```python source_df.withColumn("is_blah_null_or_blank", F.col("blah").isNullOrBlank()) ``` -**isNotIn()** +**is_not_in()** -Returns `True` if `fun_thing` is not included in the `bobs_hobbies` list. +To see if a value is not in a list of values: `True` if `fun_thing` is not included in the `bobs_hobbies` list. ```python source_df.withColumn("is_not_bobs_hobby", F.col("fun_thing").isNotIn(bobs_hobbies)) ``` -**nullBetween()** +**null_between()** -Returns `True` if `age` is between `lower_age` and `upper_age`. If `lower_age` is populated and `upper_age` is `null`, it will return `True` if `age` is greater than or equal to `lower_age`. If `lower_age` is `null` and `upper_age` is populate, it will return `True` if `age` is lower than or equal to `upper_age`. +To see if a value is between two values in a null friendly way: `True` if `age` is between `lower_age` and `upper_age`. If `lower_age` is populated and `upper_age` is `null`, it will return `True` if `age` is greater than or equal to `lower_age`. If `lower_age` is `null` and `upper_age` is populate, it will return `True` if `age` is lower than or equal to `upper_age`. ```python source_df.withColumn("is_between", F.col("age").nullBetween(F.col("lower_age"), F.col("upper_age"))) diff --git a/quinn/functions.py b/quinn/functions.py index f802ae55..d0e38638 100644 --- a/quinn/functions.py +++ b/quinn/functions.py @@ -318,9 +318,9 @@ def is_falsy(col: Column) -> Column: def is_truthy(col: Column) -> Column: - """Calculates a boolean expression that is the opposite of isFalsy for the given ``Column`` col. + """Calculates a boolean expression that is the opposite of is_falsy for the given ``Column`` col. - :param Column col: The ``Column`` to calculate the opposite of isFalsy for. + :param Column col: The ``Column`` to calculate the opposite of is_falsy for. :returns: A ``Column`` with the results of the calculation. :rtype: Column """ From d11707a9916f65ea9acb0fb82dba228e713a8759 Mon Sep 17 00:00:00 2001 From: Nijanthan <6072170+nijanthanvijayakumar@users.noreply.github.com> Date: Mon, 15 Jul 2024 23:03:39 +1000 Subject: [PATCH 2/7] DOC: CONTRIBUTING.md - add details on precommit & local GitHub Actions setup Add details to `CONTRIBUTING.md` on auto-assigning issues, pre-commit installation, and GitHub Actions local setup using 'act'. * **Auto-assigning issues**: Add a section explaining auto-assigning issues on the comment 'take', referencing the configuration in `.github/workflows/assign-on-comment.yml`. * **Pre-commit installation and execution**: Add a section detailing pre-commit installation and execution, referencing the configuration in `.pre-commit-config.yaml`. * **GitHub Actions local setup using 'act'**: Add a section providing instructions for GitHub Actions local setup using 'act', referencing the configuration in `.github/workflows/ci.yml`. Include instructions for running specific jobs and handling MacBooks with M1 processors. --- CONTRIBUTING.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2fbcb3d4..eeacdd37 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,6 +14,10 @@ Scan through our [existing issues](https://github.com/MrPowers/quinn/issues) to You can find a list of [good first issues](https://github.com/MrPowers/quinn/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) which can help you better understand code base of the project. +### Auto-assigning issues + +We have a workflow that automatically assigns issues to users who comment 'take' on an issue. This is configured in the `.github/workflows/assign-on-comment.yml` file. + ## Contributing ### Fork the repository @@ -49,6 +53,17 @@ make install_deps To run spark tests you need to have properly configured Java. Apache Spark currently supports mainly only Java 8 (1.8). You can find an instruction on how to set up Java [here](https://www.java.com/en/download/help/download_options.html). When you are running spark tests you should have `JAVA_HOME` variable in your environment which points to the installation of Java 8. +### Pre-commit installation and execution + +We use pre-commit hooks to ensure code quality. The configuration for pre-commit hooks is in the `.pre-commit-config.yaml` file. To install pre-commit, run: +```shell +pip install pre-commit +pre-commit install +``` +To run pre-commit hooks manually, use: +```shell +pre-commit run --all-files +``` ### Running Tests @@ -57,6 +72,26 @@ You can run test as following: ```shell make test ``` + +### GitHub Actions local setup using 'act' + +You can run GitHub Actions locally using the `act` tool. The configuration for GitHub Actions is in the `.github/workflows/ci.yml` file. To install `act`, follow the instructions [here](https://github.com/nektos/act#installation). To run a specific job, use: +```shell +act -j +``` +For example, to run the `test` job, use: +```shell +act -j test +``` +If you need help with `act`, use: +```shell +act --help +``` +For MacBooks with M1 processors, you might have to add the `--container-architecture` tag: +```shell +act -j --container-architecture linux/arm64 +``` + ### Code style This project follows the [PySpark style guide](https://github.com/MrPowers/spark-style-guide/blob/main/PYSPARK_STYLE_GUIDE.md). All public functions and methods should be documented in `README.md` and also should have docstrings in `sphinx format`: From d89cd313d812c6aded93976af2cb1776176f14af Mon Sep 17 00:00:00 2001 From: Niju Vijayakumar Date: Tue, 16 Jul 2024 19:09:23 +1000 Subject: [PATCH 3/7] Update the CONTRIBUTING.md regarding auto-assign issues --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eeacdd37..4cd862d0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -16,7 +16,7 @@ You can find a list of [good first issues](https://github.com/MrPowers/quinn/iss ### Auto-assigning issues -We have a workflow that automatically assigns issues to users who comment 'take' on an issue. This is configured in the `.github/workflows/assign-on-comment.yml` file. +We have a workflow that automatically assigns issues to users who comment 'take' on an issue. This is configured in the `.github/workflows/assign-on-comment.yml` file. When a user comments `take` on the issue, a GitHub Action will be run to assign the issue to the user if it's not already assigned. ## Contributing From c05af322b62f926b40681b9122db39118a1abeb3 Mon Sep 17 00:00:00 2001 From: Niju Vijayakumar Date: Tue, 16 Jul 2024 20:22:42 +1000 Subject: [PATCH 4/7] Change pip to poetry for pre-commit installation According to the review comment, make the pip installs as poetry installs --- CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4cd862d0..75325696 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -57,8 +57,8 @@ To run spark tests you need to have properly configured Java. Apache Spark curre We use pre-commit hooks to ensure code quality. The configuration for pre-commit hooks is in the `.pre-commit-config.yaml` file. To install pre-commit, run: ```shell -pip install pre-commit -pre-commit install +poetry shell +poetry run pre-commit install ``` To run pre-commit hooks manually, use: ```shell From 548f4454e2a5d8f4f6c6e103f0ee70148e71711d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 29 Aug 2024 18:26:57 +0000 Subject: [PATCH 5/7] Bump jupyterlab from 3.6.7 to 3.6.8 Bumps [jupyterlab](https://github.com/jupyterlab/jupyterlab) from 3.6.7 to 3.6.8. - [Release notes](https://github.com/jupyterlab/jupyterlab/releases) - [Changelog](https://github.com/jupyterlab/jupyterlab/blob/main/CHANGELOG.md) - [Commits](https://github.com/jupyterlab/jupyterlab/compare/@jupyterlab/vdom@3.6.7...@jupyterlab/vdom@3.6.8) --- updated-dependencies: - dependency-name: jupyterlab dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 4e9931c1..4d1de893 100644 --- a/poetry.lock +++ b/poetry.lock @@ -994,13 +994,13 @@ test = ["pre-commit", "pytest", "pytest-asyncio", "websockets (>=10.0)", "ypy-we [[package]] name = "jupyterlab" -version = "3.6.7" +version = "3.6.8" description = "JupyterLab computational environment" optional = false python-versions = ">=3.7" files = [ - {file = "jupyterlab-3.6.7-py3-none-any.whl", hash = "sha256:d92d57d402f53922bca5090654843aa08e511290dff29fdb0809eafbbeb6df98"}, - {file = "jupyterlab-3.6.7.tar.gz", hash = "sha256:2fadeaec161b0d1aec19f17721d8b803aef1d267f89c8b636b703be14f435c8f"}, + {file = "jupyterlab-3.6.8-py3-none-any.whl", hash = "sha256:891284e75158998e23eb7a23ecc4caaf27b365e41adca374109b1305b9f769db"}, + {file = "jupyterlab-3.6.8.tar.gz", hash = "sha256:a2477383e23f20009188bd9dac7e6e38dbc54307bc36d716bea6ced450647c97"}, ] [package.dependencies] From f1e7cc95b2406f96d085d810bc1d148fa90e0566 Mon Sep 17 00:00:00 2001 From: Nijanthan <6072170+nijanthanvijayakumar@users.noreply.github.com> Date: Mon, 16 Sep 2024 10:46:17 +1000 Subject: [PATCH 6/7] Add configuration files for release-please and update contributing guide * **release-please-config.json**: Add configuration file with release type set to `python`, changelog path to `CHANGELOG.md`, and packages to include `quinn`. * **.release-please-manifest.json**: Add manifest file with `quinn` version set to `1.0.0`. * **.github/workflows/release-please.yml**: Add workflow file for release-please action * **CONTRIBUTING.md**: Add a section for the "Release Process" --- .github/workflows/release-please.yml | 19 +++++++++++++++++++ .release-please-manifest.json | 3 +++ CONTRIBUTING.md | 23 +++++++++++++++++++++++ release-please-config.json | 14 ++++++++++++++ 4 files changed, 59 insertions(+) create mode 100644 .github/workflows/release-please.yml create mode 100644 .release-please-manifest.json create mode 100644 release-please-config.json diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml new file mode 100644 index 00000000..8ca09760 --- /dev/null +++ b/.github/workflows/release-please.yml @@ -0,0 +1,19 @@ +on: + push: + branches: + - main + +permissions: + contents: write + pull-requests: write + +name: release-please + +jobs: + release-please: + runs-on: ubuntu-latest + steps: + - uses: googleapis/release-please-action@v4 + with: + config-file: release-please-config.json + manifest-file: .release-please-manifest.json diff --git a/.release-please-manifest.json b/.release-please-manifest.json new file mode 100644 index 00000000..37fcefaa --- /dev/null +++ b/.release-please-manifest.json @@ -0,0 +1,3 @@ +{ + ".": "1.0.0" +} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 75325696..036e4680 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -159,3 +159,26 @@ When you're finished with the changes, create a pull request, also known as a PR - Don't forget to link PR to the issue if you are solving one. - As you update your PR and apply changes, mark each conversation as resolved. - If you run into any merge issues, checkout this [git tutorial](https://github.com/skills/resolve-merge-conflicts) to help you resolve merge conflicts and other issues. + +### Release Process + +#### How to create a release + +If the latest release on GitHub is 1.0.0, and the latest pre-release is 1.1.0-rc, and if we want to create a release of 1.1.0 then the `release-please-config.json` should be updated by removing the `"versioning":"prerelease"` and setting the `"prerelease": false`. Otherwise, the GitHub Actions will not create a release and it would rather create a pre-release again. + +For this whole release process to work seamlessly, lets say the latest release on GitHub is 1.0.0, and the latest pre-release is 1.1.0-rc, and if we want to create a release of 1.1.0, then we will have to set `"bump-minor-pre-major": true,` in the `release-please-config.json` and we should change the version in the `.release-please-manifest.json` from `1.1.0-rc` to `1.0.0` before pushing changes to the remote branch. + +Improper configs: +If the config is `"bump-minor-pre-major": false,` and the `.release-please-manifest.json` contains the version as `1.1.0-rc`, then the automated GH Actions will bump the major version creating a release of 2.0.0. + +If the config is `"bump-minor-pre-major": true,` and the `.release-please-manifest.json` contains the version as `1.1.0-rc`, then the automated GH Actions will create a version of `1.2.0`. Either of those above improper configs will lead to a gap in the release. Hence, it's important to downgrade the version in the version file and set the `"bump-minor-pre-major": true,` if we are creating a minor release. If we are creating a major release, then perhaps use the config `"bump-minor-pre-major": false,` and then set the version in the version file to the latest release version. + +#### How to create a pre-release + +In order to create a pre-release, the user should just update the `release-please-config.json` in the root. They should set the `"prerelease"` to `true` and the `"versioning"` to `"prerelease"`. Otherwise, a pre-release will not be created. In this case, since the pre-release-type is set to `rc`, if the current version is `1.0.0`, and the following configs are set as `"bump-minor-pre-major": true, "bump-patch-for-minor-pre-major": false,`, then the pre-release version will be bumped to as `1.1.0-rc`. + +If the configs are as follows `"bump-minor-pre-major": false, "bump-patch-for-minor-pre-major": false,`, then the pre-release version will be `2.0.0-rc`, i.e., the major version will be bumped. + +#### Conventional commit messages tips + +- diff --git a/release-please-config.json b/release-please-config.json new file mode 100644 index 00000000..69deeb83 --- /dev/null +++ b/release-please-config.json @@ -0,0 +1,14 @@ +{ + "packages": { + ".": { + "changelog-path": "CHANGELOG.md", + "release-type": "python", + "prerelease-type": "rc", + "bump-minor-pre-major": true, + "bump-patch-for-minor-pre-major": false, + "draft": false, + "prerelease": true, + "versioning": "prerelease" + } + } +} \ No newline at end of file From bf8796e65c5667ddca6e3bba1c308ab5b7fc472e Mon Sep 17 00:00:00 2001 From: Niju Vijayakumar Date: Mon, 16 Sep 2024 11:04:05 +1000 Subject: [PATCH 7/7] feat: Update the documentation to elaborate the release process --- .release-please-manifest.json | 2 +- CONTRIBUTING.md | 57 ++++++++++++++++++++++++++++------- release-please-config.json | 2 +- 3 files changed, 48 insertions(+), 13 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 37fcefaa..8c7bf663 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.0.0" + ".": "0.10.3" } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 036e4680..1bcb1ba7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -162,23 +162,58 @@ When you're finished with the changes, create a pull request, also known as a PR ### Release Process -#### How to create a release +#### How to Create a Release -If the latest release on GitHub is 1.0.0, and the latest pre-release is 1.1.0-rc, and if we want to create a release of 1.1.0 then the `release-please-config.json` should be updated by removing the `"versioning":"prerelease"` and setting the `"prerelease": false`. Otherwise, the GitHub Actions will not create a release and it would rather create a pre-release again. +- **Update `release-please-config.json`:** + - Remove `"versioning": "prerelease"`. + - Set `"prerelease": false`. -For this whole release process to work seamlessly, lets say the latest release on GitHub is 1.0.0, and the latest pre-release is 1.1.0-rc, and if we want to create a release of 1.1.0, then we will have to set `"bump-minor-pre-major": true,` in the `release-please-config.json` and we should change the version in the `.release-please-manifest.json` from `1.1.0-rc` to `1.0.0` before pushing changes to the remote branch. +- **Ensure correct version bump:** + - If the latest release is `1.0.0` and the latest pre-release is `1.1.0-rc`, and you want to create a release with the version `1.1.0`: + - Set `"bump-minor-pre-major": true` in `release-please-config.json`. + - Change the version in `.release-please-manifest.json` from `1.1.0-rc` to `1.0.0`. -Improper configs: -If the config is `"bump-minor-pre-major": false,` and the `.release-please-manifest.json` contains the version as `1.1.0-rc`, then the automated GH Actions will bump the major version creating a release of 2.0.0. +- **Avoid configurations that cause version/release gaps:** + - If `"bump-minor-pre-major": false` and `.release-please-manifest.json` is `1.1.0-rc`, it will create a major release `2.0.0`. + - If `"bump-minor-pre-major": true` and `.release-please-manifest.json` is `1.1.0-rc`, it will create a minor release `1.2.0`. -If the config is `"bump-minor-pre-major": true,` and the `.release-please-manifest.json` contains the version as `1.1.0-rc`, then the automated GH Actions will create a version of `1.2.0`. Either of those above improper configs will lead to a gap in the release. Hence, it's important to downgrade the version in the version file and set the `"bump-minor-pre-major": true,` if we are creating a minor release. If we are creating a major release, then perhaps use the config `"bump-minor-pre-major": false,` and then set the version in the version file to the latest release version. +- **Key points:** + - Downgrade the version in the version file for minor releases. + - Use `"bump-minor-pre-major": true` for minor releases. + - Use `"bump-minor-pre-major": false` for major releases and set the version to the latest release version. -#### How to create a pre-release +#### How to Create a Pre-Release -In order to create a pre-release, the user should just update the `release-please-config.json` in the root. They should set the `"prerelease"` to `true` and the `"versioning"` to `"prerelease"`. Otherwise, a pre-release will not be created. In this case, since the pre-release-type is set to `rc`, if the current version is `1.0.0`, and the following configs are set as `"bump-minor-pre-major": true, "bump-patch-for-minor-pre-major": false,`, then the pre-release version will be bumped to as `1.1.0-rc`. +- Update the `release-please-config.json` in the root. + - Set `"prerelease"` to `true`. + - Add the line `"versioning": "prerelease"`. +- Ensure the pre-release type is set to `rc`. + - For example, if the current version is `1.0.0` and the following configs are set: + - `"bump-minor-pre-major": true` + - `"bump-patch-for-minor-pre-major": false` + - The pre-release version will be bumped to `1.1.0-rc`. + - If the configs are set as: + - `"bump-minor-pre-major": false` + - `"bump-patch-for-minor-pre-major": false` + - The pre-release version will be `2.0.0-rc`, i.e., the major version will be bumped. -If the configs are as follows `"bump-minor-pre-major": false, "bump-patch-for-minor-pre-major": false,`, then the pre-release version will be `2.0.0-rc`, i.e., the major version will be bumped. -#### Conventional commit messages tips +### Conventional Commit Messages -- +- **Format**: `[optional scope]: ` +- **Type**: Specifies the nature of the change (e.g., `feat`, `fix`, `docs`). +- **Scope**: Optional part that specifies the section of the codebase affected. +- **Description**: A brief summary of the change. +- **Body**: Optional detailed explanation of the change. +- **Footer**: Optional additional information, such as breaking changes or issue references. + +#### Example + +```plaintext +feat(parser): add support for new data format + +Added a new parser to handle the VARIANT data format. This change includes updates to the parser module and corresponding tests. + +Closes #123 +``` +For more information on conventional commit messages, check this site: https://www.conventionalcommits.org/en/v1.0.0/ \ No newline at end of file diff --git a/release-please-config.json b/release-please-config.json index 69deeb83..af27e83f 100644 --- a/release-please-config.json +++ b/release-please-config.json @@ -4,7 +4,7 @@ "changelog-path": "CHANGELOG.md", "release-type": "python", "prerelease-type": "rc", - "bump-minor-pre-major": true, + "bump-minor-pre-major": false, "bump-patch-for-minor-pre-major": false, "draft": false, "prerelease": true,