From 298f3f09b3f96a857af60e26021e79b6d025c841 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 08:00:06 +0000 Subject: [PATCH 01/18] update --- .github/workflows/release-pypi.yml | 2 ++ requirements/test.txt | 4 ++++ setup.py | 5 ++--- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index f3d6ebfe..3b33f3e3 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -21,8 +21,10 @@ jobs: - name: Install dependencies run: pip install -U build twine + - name: Build package run: python -m build + - name: Check package run: twine check dist/* diff --git a/requirements/test.txt b/requirements/test.txt index c3fbc921..1b85876c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -2,3 +2,7 @@ coverage>=5.0 pytest>=6.0 pytest-cov mypy==1.8.0 +viztracer +pandas +pyarrow +pillow \ No newline at end of file diff --git a/setup.py b/setup.py index 382eda42..a90ee702 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = include_package_data=True, zip_safe=False, keywords=["deep learning", "pytorch", "AI"], - python_requires=">=3.8", + python_requires=">=3.10", setup_requires=["wheel"], install_requires=_load_requirements(), extras_require=_prepare_extras(), @@ -91,9 +91,8 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], ) \ No newline at end of file From 387dae653d74eb91282db4d0c029a2b2b1d5f090 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 08:01:36 +0000 Subject: [PATCH 02/18] update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a90ee702..123e7630 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = include_package_data=True, zip_safe=False, keywords=["deep learning", "pytorch", "AI"], - python_requires=">=3.10", + python_requires=">=3.8", setup_requires=["wheel"], install_requires=_load_requirements(), extras_require=_prepare_extras(), From 4a93a033e5fd9eeb1363ebdbc9f8f79c02b9ab4a Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 08:03:23 +0000 Subject: [PATCH 03/18] update --- .github/workflows/ci-checks.yml | 2 +- .github/workflows/ci-testing.yml | 2 +- .github/workflows/docs-build.yml | 2 +- .readthedocs.yml | 2 +- MANIFEST.in | 2 +- Makefile | 6 +++--- setup.py | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml index e540ac19..0d7d66f6 100644 --- a/.github/workflows/ci-checks.yml +++ b/.github/workflows/ci-checks.yml @@ -39,4 +39,4 @@ jobs: check-docs: uses: Lightning-AI/utilities/.github/workflows/check-docs.yml@main with: - requirements-file: "_requirements/docs.txt" + requirements-file: "requirements/docs.txt" diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 1300a004..20905c2a 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -41,7 +41,7 @@ jobs: - name: Set min. dependencies if: matrix.requires == 'oldest' run: | - for fpath in ('requirements.txt', '_requirements/test.txt'): + for fpath in ('requirements.txt', 'requirements/test.txt'): req = open(fpath).read().replace('>=', '==') open(fpath, 'w').write(req) shell: python diff --git a/.github/workflows/docs-build.yml b/.github/workflows/docs-build.yml index 29670d7b..341eb41c 100644 --- a/.github/workflows/docs-build.yml +++ b/.github/workflows/docs-build.yml @@ -10,7 +10,7 @@ jobs: build-docs: uses: Lightning-AI/utilities/.github/workflows/check-docs.yml@main with: - requirements-file: "_requirements/docs.txt" + requirements-file: "requirements/docs.txt" # https://github.com/marketplace/actions/deploy-to-github-pages docs-deploy: diff --git a/.readthedocs.yml b/.readthedocs.yml index f6bb36d5..e67b1544 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -17,4 +17,4 @@ formats: all python: version: 3.7 install: - - requirements: _requirements/docs.txt + - requirements: requirements/docs.txt diff --git a/MANIFEST.in b/MANIFEST.in index 4cd9bf95..55724b79 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -28,7 +28,7 @@ exclude docs # Include the Requirements include requirements.txt -recursive-include _requirements *.tx;t +recursive-include requirements *.tx;t # Exclude Makefile exclude Makefile diff --git a/Makefile b/Makefile index 6a8c0abb..e1d83cd4 100644 --- a/Makefile +++ b/Makefile @@ -6,15 +6,15 @@ export SLURM_LOCALID=0 export SPHINX_MOCK_REQUIREMENTS=0 test: clean - pip install -q -r requirements.txt - pip install -q -r _requirements/test.txt + pip install -q -r requirements/requirements.txt + pip install -q -r requirements/test.txt # use this to run tests python -m coverage run --source lit_data -m pytest src tests -v --flake8 python -m coverage report docs: clean - pip install . --quiet -r _requirements/docs.txt + pip install . --quiet -r requirements/docs.txt python -m sphinx -b html -W --keep-going docs/source docs/build clean: diff --git a/setup.py b/setup.py index 123e7630..4b79c44e 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ from setuptools import find_packages, setup _PATH_ROOT = os.path.dirname(__file__) -_PATH_REQUIRES = os.path.join(_PATH_ROOT, "_requirements") +_PATH_REQUIRES = os.path.join(_PATH_ROOT, "requirements") def _load_py_module(fname, pkg="lit_data"): From 50529ddb4f390a92cd5e1b0298d10c0da8ce9b27 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 08:19:24 +0000 Subject: [PATCH 04/18] update --- .github/workflows/ci-testing.yml | 2 +- .pre-commit-config.yaml | 112 ++++++++------ Makefile | 2 +- lit_data/CHANGELOG.md | 47 ++++++ pyproject.toml | 254 +++++++++++++++++++------------ requirements/requirements.txt | 5 +- requirements/test.txt | 10 +- 7 files changed, 286 insertions(+), 146 deletions(-) create mode 100644 lit_data/CHANGELOG.md diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 20905c2a..3d3db3d8 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -65,7 +65,7 @@ jobs: - name: Tests run: | - coverage run --source lit_data -m pytest src tests -v + coverage run --source lit_data -m pytest tests -v - name: Statistics if: success() diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ea7b710c..295c2f1f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,89 +1,113 @@ +# Copyright The Lightning AI team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + default_language_version: python: python3 ci: autofix_prs: true autoupdate_commit_msg: "[pre-commit.ci] pre-commit suggestions" - autoupdate_schedule: "monthly" + autoupdate_schedule: quarterly # submodules: true repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: end-of-file-fixer - id: trailing-whitespace - - id: check-case-conflict + # keep formatting in README flexible + exclude: README.md + - id: check-json - id: check-yaml - id: check-toml - - id: check-json - - id: check-added-large-files - id: check-docstring-first + - id: check-executables-have-shebangs + - id: check-case-conflict + - id: check-added-large-files + args: ["--maxkb=350", "--enforce-all"] + exclude: | + (?x)^( + docs/source-pytorch/_static/images/general/fast_2.gif| + docs/source-pytorch/_static/images/mnist_imgs/pt_to_pl.jpg| + docs/source-pytorch/_static/images/lightning_module/pt_to_pl.png| + docs/source-pytorch/_static/images/general/pl_quick_start_full_compressed.gif| + docs/source-pytorch/_static/images/general/pl_overview_flat.jpg| + docs/source-pytorch/_static/images/general/pl_overview.gif| + src/lightning/fabric/CHANGELOG.md| + src/lightning/pytorch/CHANGELOG.md + )$ - id: detect-private-key - repo: https://github.com/asottile/pyupgrade - rev: v3.9.0 + rev: v3.15.0 hooks: - id: pyupgrade args: ["--py38-plus"] name: Upgrade code - - repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.10.0 - hooks: - - id: python-use-type-annotations - - repo: https://github.com/codespell-project/codespell - rev: v2.2.5 + rev: v2.2.6 hooks: - id: codespell additional_dependencies: [tomli] - #args: ["--write-changes"] + #args: ["--write-changes"] # uncomment if you want to get automatic fixing - - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.3 - hooks: - - id: prettier - # https://prettier.io/docs/en/options.html#print-width - args: ["--print-width=120"] - - - repo: https://github.com/myint/docformatter + - repo: https://github.com/PyCQA/docformatter rev: v1.7.5 hooks: - id: docformatter additional_dependencies: [tomli] args: ["--in-place"] - - repo: https://github.com/psf/black - rev: 23.7.0 + - repo: https://github.com/asottile/yesqa + rev: v1.5.0 hooks: - - id: black - name: Black code + - id: yesqa + name: Unused noqa + additional_dependencies: + #- pep8-naming + - flake8-pytest-style + - flake8-bandit + - flake8-simplify + - flake8-return + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: "v0.2.0" + hooks: + - id: ruff + args: ["--fix", "--preview"] + - id: ruff-format + args: ["--preview"] - repo: https://github.com/executablebooks/mdformat - rev: 0.7.16 + rev: 0.7.17 hooks: - id: mdformat additional_dependencies: - mdformat-gfm - - mdformat-black + #- mdformat-black - mdformat_frontmatter - exclude: CHANGELOG.md - - - repo: https://github.com/asottile/yesqa - rev: v1.5.0 - hooks: - - id: yesqa - additional_dependencies: - - pep8-naming - - pydocstyle - #- flake8-comprehensions - #- flake8-pytest-style - #- flake8-return - #- flake8-simplify + exclude: | + (?x)^( + src/data/CHANGELOG.md| + README.md + )$ - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.277 + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v3.1.0 hooks: - - id: ruff - args: ["--fix"] + - id: prettier + # https://prettier.io/docs/en/options.html#print-width + args: ["--print-width=120"] diff --git a/Makefile b/Makefile index e1d83cd4..56f3a147 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ test: clean pip install -q -r requirements/test.txt # use this to run tests - python -m coverage run --source lit_data -m pytest src tests -v --flake8 + python -m coverage run --source lit_data -m pytest src -v --flake8 python -m coverage report docs: clean diff --git a/lit_data/CHANGELOG.md b/lit_data/CHANGELOG.md new file mode 100644 index 00000000..255c28a4 --- /dev/null +++ b/lit_data/CHANGELOG.md @@ -0,0 +1,47 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). + +## [unReleased] - 2024-MM-DD + +### Added + +- + +- + +- + +### Changed + +- + +- + +- + +### Deprecated + +- + +- + +- + +### Removed + +- + +- + +- + +### Fixed + +- + +- + +- diff --git a/pyproject.toml b/pyproject.toml index d3a55d64..ee320fb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,21 @@ +# Copyright The Lightning AI team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + [metadata] -license_file = "LICENSE" -description-file = "README.md" +name = "lit_data" +author = "Lightning-AI et al." +url = "https://github.com/Lightning-AI/lit-data" [build-system] requires = [ @@ -9,125 +24,174 @@ requires = [ ] -[tool.check-manifest] -ignore = [ - "*.yml", - ".github", - ".github/*" -] - - -[tool.pytest.ini_options] -norecursedirs = [ - ".git", - ".github", - "dist", - "build", - "docs", -] -addopts = [ - "--strict-markers", - "--doctest-modules", - "--color=yes", - "--disable-pytest-warnings", -] -filterwarnings = [ - "error::FutureWarning", -] -xfail_strict = true -junit_duration_report = "call" - -[tool.coverage.report] -exclude_lines = [ - "pragma: no cover", - "pass", -] - -[tool.codespell] -#skip = '*.py' -quiet-level = 3 -# comma separated list of words; waiting for: -# https://github.com/codespell-project/codespell/issues/2839#issuecomment-1731601603 -# also adding links until they ignored by its: nature -# https://github.com/codespell-project/codespell/issues/2243#issuecomment-1732019960 -#ignore-words-list = "" - - [tool.black] -# https://github.com/psf/black line-length = 120 -exclude = "(.eggs|.git|.hg|.mypy_cache|.venv|_build|buck-out|build|dist)" +exclude = '(_notebooks/.*)' [tool.docformatter] recursive = true -wrap-summaries = 120 +# this need to be shorter as some docstings are r"""... +wrap-summaries = 119 wrap-descriptions = 120 blank = true - -[tool.mypy] -files = [ - "src", -] -install_types = true -non_interactive = true -disallow_untyped_defs = true -ignore_missing_imports = true -show_error_codes = true -warn_redundant_casts = true -warn_unused_configs = true -warn_unused_ignores = true -allow_redefinition = true -# disable this rule as the Trainer attributes are defined in the connectors, not in its __init__ -disable_error_code = "attr-defined" -# style choices -warn_no_return = false +[tool.codespell] +# Todo: enable also python files in a next step +skip = '*.py' +quiet-level = 3 +# comma separated list of words; waiting for: +# https://github.com/codespell-project/codespell/issues/2839#issuecomment-1731601603 +# also adding links until they ignored by its: nature +# https://github.com/codespell-project/codespell/issues/2243#issuecomment-1732019960 +ignore-words-list = "te, compiletime" [tool.ruff] line-length = 120 # Enable Pyflakes `E` and `F` codes by default. -select = [ +lint.select = [ "E", "W", # see: https://pypi.org/project/pycodestyle "F", # see: https://pypi.org/project/pyflakes - "D", # see: https://pypi.org/project/pydocstyle - "N", # see: https://pypi.org/project/pep8-naming + "S", # see: https://pypi.org/project/flake8-bandit + "RUF018", # see: https://docs.astral.sh/ruff/rules/assignment-in-assert ] -extend-select = [ +lint.extend-select = [ + "I", # see: isort "C4", # see: https://pypi.org/project/flake8-comprehensions - "PT", # see: https://pypi.org/project/flake8-pytest-style - "RET", # see: https://pypi.org/project/flake8-return "SIM", # see: https://pypi.org/project/flake8-simplify + "RET", # see: https://pypi.org/project/flake8-return + "PT", # see: https://pypi.org/project/flake8-pytest-style ] -ignore = [ +lint.ignore = [ "E731", # Do not assign a lambda expression, use a def + "S108", + "E203", # conflicts with black ] # Exclude a variety of commonly ignored directories. exclude = [ - ".eggs", ".git", - ".mypy_cache", - ".ruff_cache", - "__pypackages__", - "_build", - "build", - "dist", - "docs" + "docs", + "_notebooks" +] +lint.ignore-init-module-imports = true + +[tool.ruff.lint.per-file-ignores] +".actions/*" = ["S101", "S310"] +"setup.py" = ["S101"] +"examples/**" = [ + "S101", # Use of `assert` detected + "S113", # todo: Probable use of requests call without + "S104", # Possible binding to all interface + "F821", # Undefined name `...` + "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes + "S501", # Probable use of `requests` call with `verify=False` disabling SSL certificate checks + "S108", # Probable insecure usage of temporary file or directory: "/tmp/data/MNIST" ] -ignore-init-module-imports = true +"src/**" = [ + "S101", # todo: Use of `assert` detected + "S105", "S106", "S107", # todo: Possible hardcoded password: ... + "S113", # todo: Probable use of requests call without timeout + "S301", # todo: `pickle` and modules that wrap it can be unsafe when used to deserialize untrusted data, possible security issue + "S324", # todo: Probable use of insecure hash functions in `hashlib` + "S403", # todo: `pickle`, `cPickle`, `dill`, and `shelve` modules are possibly insecure + "S404", # todo: `subprocess` module is possibly insecure + "S602", # todo: `subprocess` call with `shell=True` identified, security issue + "S603", # todo: `subprocess` call: check for execution of untrusted input + "S605", # todo: Starting a process with a shell: seems safe, but may be changed in the future; consider rewriting without `shell` + "S607", # todo: Starting a process with a partial executable path + "RET504", # todo:Unnecessary variable assignment before `return` statement + "RET503", +] +"src/lightning/data/**" = [ + "S310", # todo: Audit URL open for permitted schemes. Allowing use of `file:` or custom schemes is often unexpected. +] +"tests/**" = [ + "S101", # Use of `assert` detected + "S105", "S106", # todo: Possible hardcoded password: ... + "S301", # `pickle` and modules that wrap it can be unsafe when used to deserialize untrusted data, possible security issue + "S113", # todo: Probable use of requests call without timeout + "S311", # todo: Standard pseudo-random generators are not suitable for cryptographic purposes + "S108", # todo: Probable insecure usage of temporary file or directory: "/tmp/sys-customizations-sync" + "S202", # Uses of `tarfile.extractall()` + "S403", # `pickle`, `cPickle`, `dill`, and `shelve` modules are possibly insecure + "S404", # `subprocess` module is possibly insecure + "S602", # todo: `subprocess` call with `shell=True` identified, security issue + "S603", # todo: `subprocess` call: check for execution of untrusted input + "S605", # todo: Starting a process with a shell: seems safe, but may be changed in the future; consider rewriting without `shell` + "S607", # todo: Starting a process with a partial executable path + "RET504", # todo:Unnecessary variable assignment before `return` statement + "PT004", # todo: Fixture `tmpdir_unittest_fixture` does not return anything, add leading underscore + "PT011", # todo: `pytest.raises(ValueError)` is too broad, set the `match` parameter or use a more specific exception + "PT012", # todo: `pytest.raises()` block should contain a single simple statement + "PT019", # todo: Fixture `_` without value is injected as parameter, use `@pytest.mark.usefixtures` instead +] + +[tool.ruff.lint.mccabe] +# Unlike Flake8, default to a complexity level of 10. +max-complexity = 10 -[tool.ruff.per-file-ignores] -"setup.py" = ["D100", "SIM115"] -"__about__.py" = ["D100"] -"__init__.py" = ["D100"] -[tool.ruff.pydocstyle] -# Use Google-style docstrings. -convention = "google" +[tool.mypy] +files = [ + "src/lightning", +] +# This section is for folders with "-" as they are not valid python modules +exclude = [ +] +install_types = "True" +non_interactive = "True" +disallow_untyped_defs = "True" +ignore_missing_imports = "True" +show_error_codes = "True" +warn_redundant_casts = "True" +warn_unused_configs = "True" +warn_unused_ignores = "True" +allow_redefinition = "True" +# disable this rule as the Trainer attributes are defined in the connectors, not in its __init__ +disable_error_code = "attr-defined" +# style choices +warn_no_return = "False" + +# Ignore mypy errors for these files +# TODO: the goal is for this to be empty +[[tool.mypy.overrides]] +# the list can be generated with: +# mypy --no-error-summary 2>&1 | tr ':' ' ' | awk '{print $1}' | sort | uniq | sed 's/\.py//g; s|src/||g; s|\/|\.|g' | xargs -I {} echo '"{}",' +module = [ +] +ignore_errors = "True" -#[tool.ruff.pycodestyle] -#ignore-overlong-task-comments = true -[tool.ruff.mccabe] -# Unlike Flake8, default to a complexity level of 10. -max-complexity = 10 +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "warnings", + "pass", + "rank_zero_warn", + "raise NotImplementedError", +] + + +[tool.pytest.ini_options] +norecursedirs = [ + ".git", + ".github", + "dist", + "build", + "docs", +] +addopts = [ + "--strict-markers", + "--doctest-modules", + "--color=yes", + "--disable-pytest-warnings", + "--ignore=legacy/checkpoints", +] +markers = [ + "cloud: Run the cloud tests for example", +] +filterwarnings = [ + "error::FutureWarning", +] +xfail_strict = true +junit_duration_report = "call" diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 8d740a95..c2eb5e36 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,2 +1,5 @@ lightning-utilities >=0.8.0, <0.10.0 -lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility \ No newline at end of file +lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility +# to be able to include also PL 2.0 and preserve `>` needed for CI min version bypass +torch >0.14.0, <=2.2.0 +filelock \ No newline at end of file diff --git a/requirements/test.txt b/requirements/test.txt index 1b85876c..155d2939 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,7 +1,9 @@ -coverage>=5.0 -pytest>=6.0 -pytest-cov -mypy==1.8.0 +coverage ==7.3.1 +pytest ==7.4.0 +pytest-cov ==4.1.0 +pytest-timeout ==2.1.0 +pytest-rerunfailures ==12.0 +pytest-random-order ==1.1.0 viztracer pandas pyarrow From f1c4c4a3db9c69020d9a7c7bd91f55c32f30522b Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 08:32:25 +0000 Subject: [PATCH 05/18] update --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 4b79c44e..85d57000 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ def _load_requirements(path_dir: str = _PATH_ROOT, file_name: str = "requirement readme = fopen.read() -def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = ("devel.txt", "docs.txt")) -> dict: +def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = ("devel.txt", "docs.txt", "test.txt")) -> dict: # https://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-extras # Define package extras. These are only installed if you specify them. # From remote, use like `pip install pytorch-lightning[dev, docs]` @@ -59,8 +59,8 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = url=about.__homepage__, download_url="https://github.com/Lightning-AI/lit-data", license=about.__license__, - packages=find_packages(where="src"), - package_dir={"": "src"}, + packages=find_packages(where="lit_data"), + package_dir={"": "lit_data"}, long_description=readme, long_description_content_type="text/markdown", include_package_data=True, From d27e5d73b0604738671c5d5858aa870892eeb39d Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 08:37:52 +0000 Subject: [PATCH 06/18] update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 85d57000..162db4a5 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ def _load_requirements(path_dir: str = _PATH_ROOT, file_name: str = "requirement readme = fopen.read() -def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = ("devel.txt", "docs.txt", "test.txt")) -> dict: +def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = ()) -> dict: # https://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-extras # Define package extras. These are only installed if you specify them. # From remote, use like `pip install pytorch-lightning[dev, docs]` From 621ffef4f91e6d78a9029102dfe6c224404c7674 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 08:44:33 +0000 Subject: [PATCH 07/18] update --- docs/source/conf.py | 8 ++++---- lit_data/processing/utilities.py | 4 ++-- requirements/requirements.txt | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index afdccf25..d9c446a3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -317,8 +317,8 @@ def find_source(): fname = inspect.getsourcefile(obj) # https://github.com/rtfd/readthedocs.org/issues/5735 if any(s in fname for s in ("readthedocs", "rtfd", "checkouts")): - # /home/docs/checkouts/readthedocs.org/user_builds/pytorch_lightning/checkouts/ - # devel/pytorch_lightning/utilities/cls_experiment.py#L26-L176 + # /home/docs/checkouts/readthedocs.org/user_builds/lit_data/checkouts/ + # devel/lit_data/utilities/cls_experiment.py#L26-L176 path_top = os.path.abspath(os.path.join("..", "..", "..")) fname = os.path.relpath(fname, start=path_top) else: @@ -381,8 +381,8 @@ def find_source(): import os import torch -import pytorch_lightning as pl -from pytorch_lightning import Trainer, LightningModule +import lit_data +from lit_data import StreamingDataset """ coverage_skip_undoc_in_source = True diff --git a/lit_data/processing/utilities.py b/lit_data/processing/utilities.py index 051e25dd..35455709 100644 --- a/lit_data/processing/utilities.py +++ b/lit_data/processing/utilities.py @@ -29,12 +29,12 @@ def _wrapper(*args: Any, **kwargs: Any) -> Tuple[Any, Optional[Exception]]: def make_request( url: str, timeout: int = 10, - user_agent_token: str = "pytorch-lightning", + user_agent_token: str = "lit-data", ) -> io.BytesIO: """Download an image with urllib.""" user_agent_string = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0" if user_agent_token: - user_agent_string += f" (compatible; {user_agent_token}; +https://github.com/Lightning-AI/pytorch-lightning)" + user_agent_string += f" (compatible; {user_agent_token}; +https://github.com/Lightning-AI/lit-data)" with urllib.request.urlopen( # noqa: S310 urllib.request.Request(url, data=None, headers={"User-Agent": user_agent_string}), timeout=timeout diff --git a/requirements/requirements.txt b/requirements/requirements.txt index c2eb5e36..155f82fd 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ lightning-utilities >=0.8.0, <0.10.0 lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility # to be able to include also PL 2.0 and preserve `>` needed for CI min version bypass -torch >0.14.0, <=2.2.0 +torch >=2.1.0, <=2.2.0 filelock \ No newline at end of file From 5a500f66a600b5f2442434d1853ef7350c30504b Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 08:52:33 +0000 Subject: [PATCH 08/18] update --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/ci-testing.yml | 5 +++++ Makefile | 2 +- README.md | 2 +- requirements.txt | 7 +++++-- requirements/requirements.txt | 5 ----- setup.py | 2 +- 7 files changed, 14 insertions(+), 11 deletions(-) delete mode 100644 requirements/requirements.txt diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 286a59b5..2f260695 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,7 +2,7 @@ Before submitting - [ ] Was this discussed/agreed via a Github issue? (no need for typos and docs improvements) -- [ ] Did you read the [contributor guideline](https://github.com/Lightning-AI/pytorch-lightning/blob/main/.github/CONTRIBUTING.md), Pull Request section? +- [ ] Did you read the [contributor guideline](https://github.com/Lightning-AI/lit-data/blob/main/.github/CONTRIBUTING.md), Pull Request section? - [ ] Did you make sure to update the docs? - [ ] Did you write any new necessary tests? diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 3d3db3d8..b5bc815d 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -46,6 +46,11 @@ jobs: open(fpath, 'w').write(req) shell: python + - name: Display dependencies + run: | + cat requirements.txt + cat requirements/test.txt + - name: Get pip cache dir id: pip-cache run: echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT diff --git a/Makefile b/Makefile index 56f3a147..ee079558 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ export SLURM_LOCALID=0 export SPHINX_MOCK_REQUIREMENTS=0 test: clean - pip install -q -r requirements/requirements.txt + pip install -q -r requirements.txt pip install -q -r requirements/test.txt # use this to run tests diff --git a/README.md b/README.md index 4ee44fa5..e59488be 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Lightning Data can be installed with `pip`: ```bash -pip install --no-cache-dir git+https://github.com/Lightning-AI/pytorch-lightning.git@master +pip install --no-cache-dir git+https://github.com/Lightning-AI/lit-data.git@master ``` ## ๐Ÿ Quick Start diff --git a/requirements.txt b/requirements.txt index 9df9b07b..155f82fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,5 @@ -torch >=2.0.0 -lightning >=2.0.0 +lightning-utilities >=0.8.0, <0.10.0 +lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility +# to be able to include also PL 2.0 and preserve `>` needed for CI min version bypass +torch >=2.1.0, <=2.2.0 +filelock \ No newline at end of file diff --git a/requirements/requirements.txt b/requirements/requirements.txt deleted file mode 100644 index 155f82fd..00000000 --- a/requirements/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -lightning-utilities >=0.8.0, <0.10.0 -lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility -# to be able to include also PL 2.0 and preserve `>` needed for CI min version bypass -torch >=2.1.0, <=2.2.0 -filelock \ No newline at end of file diff --git a/setup.py b/setup.py index 162db4a5..3cf70845 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ def _load_requirements(path_dir: str = _PATH_ROOT, file_name: str = "requirement def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = ()) -> dict: # https://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-extras # Define package extras. These are only installed if you specify them. - # From remote, use like `pip install pytorch-lightning[dev, docs]` + # From remote, use like `pip install lit-data[dev, docs]` # From local copy of repo, use like `pip install ".[dev, docs]"` req_files = [Path(p) for p in glob.glob(os.path.join(requirements_dir, "*.txt"))] extras = { From 5f8c9b41c849d0863923b6a6498fb33e68391179 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 08:59:54 +0000 Subject: [PATCH 09/18] update --- lit_data/__init__.py | 2 +- requirements.txt | 4 +++- requirements/test.txt | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lit_data/__init__.py b/lit_data/__init__.py index b48fe616..838223c3 100644 --- a/lit_data/__init__.py +++ b/lit_data/__init__.py @@ -19,4 +19,4 @@ if RequirementCache("lightning_sdk"): from lightning_sdk import Machine # noqa: F401 - __all__.append("Machine") + __all__.append("Machine") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 155f82fd..11001ea5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,6 @@ lightning-utilities >=0.8.0, <0.10.0 lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility # to be able to include also PL 2.0 and preserve `>` needed for CI min version bypass torch >=2.1.0, <=2.2.0 -filelock \ No newline at end of file +filelock +tqdm +numpy \ No newline at end of file diff --git a/requirements/test.txt b/requirements/test.txt index 155d2939..854aacf3 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -7,4 +7,5 @@ pytest-random-order ==1.1.0 viztracer pandas pyarrow -pillow \ No newline at end of file +pillow +lightning \ No newline at end of file From 2085e813b2615a899d2e3c719ef897aa3e9a2644 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 09:12:39 +0000 Subject: [PATCH 10/18] update --- requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 11001ea5..8e7d827a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,6 @@ lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility torch >=2.1.0, <=2.2.0 filelock tqdm -numpy \ No newline at end of file +numpy +torchvision +pillow \ No newline at end of file From b7eb72083bb1fe658385efa4683d691d14c013c7 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 09:21:17 +0000 Subject: [PATCH 11/18] update --- .github/workflows/label-conflicts.yml | 2 +- docs/source/conf.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/label-conflicts.yml b/.github/workflows/label-conflicts.yml index f15f5845..cc1c398f 100644 --- a/.github/workflows/label-conflicts.yml +++ b/.github/workflows/label-conflicts.yml @@ -14,7 +14,7 @@ jobs: triage-conflicts: runs-on: ubuntu-latest steps: - - uses: mschilde/auto-label-merge-conflicts@591722e97f3c4142df3eca156ed0dcf2bcd362bd # Oct 25, 2021 + - uses: mschilde/auto-label-merge-conflicts with: CONFLICT_LABEL_NAME: "has conflicts" GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/docs/source/conf.py b/docs/source/conf.py index d9c446a3..a6da23d1 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -16,13 +16,12 @@ _PATH_HERE = os.path.abspath(os.path.dirname(__file__)) _PATH_ROOT = os.path.realpath(os.path.join(_PATH_HERE, "..", "..")) -_PATH_SOURCE = os.path.join(_PATH_ROOT, "src") sys.path.insert(0, os.path.abspath(_PATH_ROOT)) SPHINX_MOCK_REQUIREMENTS = int(os.environ.get("SPHINX_MOCK_REQUIREMENTS", True)) # alternative https://stackoverflow.com/a/67692/4521646 -spec = spec_from_file_location("lit_data/__about__.py", os.path.join(_PATH_SOURCE, "lit_data", "__about__.py")) +spec = spec_from_file_location("lit_data/__about__.py", os.path.join(_PATH_ROOT, "lit_data", "__about__.py")) about = module_from_spec(spec) spec.loader.exec_module(about) From 1b837f9952c61ef15ff795ba326a85b2f9c74940 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 09:31:31 +0000 Subject: [PATCH 12/18] upate --- CHANGELOG.md | 2 +- lit_data/CHANGELOG.md | 12 +----------- lit_data/__about__.py | 2 +- lit_data/__init__.py | 2 +- lit_data/processing/data_processor.py | 10 +++++----- lit_data/processing/functions.py | 4 +--- lit_data/processing/utilities.py | 2 +- pyproject.toml | 4 +--- requirements.txt | 2 +- requirements/test.txt | 2 +- setup.py | 5 +++-- tests/processing/test_data_processor.py | 3 ++- tests/processing/test_functions.py | 1 + tests/processing/test_readers.py | 1 + tests/streaming/test_cache.py | 11 ++++++----- tests/streaming/test_client.py | 1 + tests/streaming/test_combined.py | 5 +++-- tests/streaming/test_dataloader.py | 3 ++- tests/streaming/test_dataset.py | 3 ++- tests/streaming/test_reader.py | 3 ++- tests/streaming/test_resolver.py | 3 ++- tests/streaming/test_sampler.py | 1 + tests/streaming/test_serializer.py | 3 ++- tests/streaming/test_writer.py | 3 ++- tests/utilities/test_packing.py | 1 + 25 files changed, 45 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28581466..0b6fbf91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] - YYYY-MM-DD +## \[Unreleased\] - YYYY-MM-DD ### Added diff --git a/lit_data/CHANGELOG.md b/lit_data/CHANGELOG.md index 255c28a4..e3655c93 100644 --- a/lit_data/CHANGELOG.md +++ b/lit_data/CHANGELOG.md @@ -4,44 +4,34 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). -## [unReleased] - 2024-MM-DD +## \[unReleased\] - 2024-MM-DD ### Added - - - - - ### Changed - - - - - ### Deprecated - - - - - ### Removed - - - - - ### Fixed - - - - - diff --git a/lit_data/__about__.py b/lit_data/__about__.py index 441afc59..fdf610bb 100644 --- a/lit_data/__about__.py +++ b/lit_data/__about__.py @@ -40,4 +40,4 @@ "__version__", ] -__version__ = "0.0.1" \ No newline at end of file +__version__ = "0.0.1" diff --git a/lit_data/__init__.py b/lit_data/__init__.py index 838223c3..b48fe616 100644 --- a/lit_data/__init__.py +++ b/lit_data/__init__.py @@ -19,4 +19,4 @@ if RequirementCache("lightning_sdk"): from lightning_sdk import Machine # noqa: F401 - __all__.append("Machine") \ No newline at end of file + __all__.append("Machine") diff --git a/lit_data/processing/data_processor.py b/lit_data/processing/data_processor.py index 33602799..52afaea5 100644 --- a/lit_data/processing/data_processor.py +++ b/lit_data/processing/data_processor.py @@ -17,9 +17,9 @@ from urllib import parse import numpy as np +from lightning import seed_everything from tqdm.auto import tqdm as _tqdm -from lightning import seed_everything from lit_data.constants import ( _BOTO3_AVAILABLE, _DEFAULT_FAST_DEV_RUN_ITEMS, @@ -427,7 +427,7 @@ def _loop(self) -> None: uploader.join() if self.remove: - assert self.remover + assert self.remover # noqa: S101 self.remove_queue.put(None) self.remover.join() @@ -487,7 +487,7 @@ def _try_upload(self, data: Optional[Union[str, Tuple[str, str]]]) -> None: if isinstance(data, str): assert os.path.exists(data), data else: - assert os.path.exists(data[-1]), data + assert os.path.exists(data[-1]), data # noqa: S101 self.to_upload_queues[self._counter % self.num_uploaders].put(data) @@ -772,7 +772,7 @@ def _upload_index(self, output_dir: Dir, cache_dir: str, num_nodes: int, node_ra # Get the index file locally for node_rank in range(num_nodes - 1): output_dir_path = output_dir.url if output_dir.url else output_dir.path - assert output_dir_path + assert output_dir_path # noqa: S101 remote_filepath = os.path.join(output_dir_path, f"{node_rank}-{_INDEX_FILENAME}") node_index_filepath = os.path.join(cache_dir, os.path.basename(remote_filepath)) if obj.scheme == "s3": @@ -941,7 +941,7 @@ def run(self, data_recipe: DataRecipe) -> None: error = self.error_queue.get(timeout=0.001) self._exit_on_error(error) except Empty: - assert self.progress_queue + assert self.progress_queue # noqa: S101 try: index, counter = self.progress_queue.get(timeout=0.001) except Empty: diff --git a/lit_data/processing/functions.py b/lit_data/processing/functions.py index 83b632cf..afa516e6 100644 --- a/lit_data/processing/functions.py +++ b/lit_data/processing/functions.py @@ -41,14 +41,12 @@ def _get_indexed_paths(data: Any) -> Dict[int, str]: flattened_item, _ = tree_flatten(data) - indexed_paths = { + return { index: element for index, element in enumerate(flattened_item) if isinstance(element, str) and os.path.exists(element) } - return indexed_paths - def _get_input_dir(inputs: Sequence[Any]) -> Optional[str]: indexed_paths = _get_indexed_paths(inputs[0]) diff --git a/lit_data/processing/utilities.py b/lit_data/processing/utilities.py index 35455709..1bdf9c71 100644 --- a/lit_data/processing/utilities.py +++ b/lit_data/processing/utilities.py @@ -2,7 +2,7 @@ import os import urllib from contextlib import contextmanager -from subprocess import Popen +from subprocess import Popen # noqa: S404 from typing import Any, Callable, Optional, Tuple from lit_data.constants import _IS_IN_STUDIO diff --git a/pyproject.toml b/pyproject.toml index ee320fb0..8a00ec11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,7 +87,7 @@ lint.ignore-init-module-imports = true "S501", # Probable use of `requests` call with `verify=False` disabling SSL certificate checks "S108", # Probable insecure usage of temporary file or directory: "/tmp/data/MNIST" ] -"src/**" = [ +"lit_data/**" = [ "S101", # todo: Use of `assert` detected "S105", "S106", "S107", # todo: Possible hardcoded password: ... "S113", # todo: Probable use of requests call without timeout @@ -101,8 +101,6 @@ lint.ignore-init-module-imports = true "S607", # todo: Starting a process with a partial executable path "RET504", # todo:Unnecessary variable assignment before `return` statement "RET503", -] -"src/lightning/data/**" = [ "S310", # todo: Audit URL open for permitted schemes. Allowing use of `file:` or custom schemes is often unexpected. ] "tests/**" = [ diff --git a/requirements.txt b/requirements.txt index 8e7d827a..efc63699 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ filelock tqdm numpy torchvision -pillow \ No newline at end of file +pillow diff --git a/requirements/test.txt b/requirements/test.txt index 854aacf3..560c8706 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -8,4 +8,4 @@ viztracer pandas pyarrow pillow -lightning \ No newline at end of file +lightning diff --git a/setup.py b/setup.py index 3cf70845..8c0bb6be 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,8 @@ def _load_py_module(fname, pkg="lit_data"): def _load_requirements(path_dir: str = _PATH_ROOT, file_name: str = "requirements.txt") -> list: - reqs = parse_requirements(open(os.path.join(path_dir, file_name)).readlines()) + with open(os.path.join(path_dir, file_name)) as f: + reqs = parse_requirements(f).readlines() return list(map(str, reqs)) @@ -95,4 +96,4 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ], -) \ No newline at end of file +) diff --git a/tests/processing/test_data_processor.py b/tests/processing/test_data_processor.py index c410893a..0dcf3eff 100644 --- a/tests/processing/test_data_processor.py +++ b/tests/processing/test_data_processor.py @@ -10,6 +10,8 @@ import pytest import torch from lightning import seed_everything +from lightning_utilities.core.imports import RequirementCache + from lit_data.processing import data_processor as data_processor_module from lit_data.processing import functions from lit_data.processing.data_processor import ( @@ -28,7 +30,6 @@ from lit_data.processing.functions import LambdaDataTransformRecipe, map, optimize from lit_data.streaming import resolver from lit_data.streaming.cache import Cache, Dir -from lightning_utilities.core.imports import RequirementCache _PIL_AVAILABLE = RequirementCache("PIL") diff --git a/tests/processing/test_functions.py b/tests/processing/test_functions.py index edc041dc..b8d927ca 100644 --- a/tests/processing/test_functions.py +++ b/tests/processing/test_functions.py @@ -3,6 +3,7 @@ from unittest import mock import pytest + from lit_data import walk from lit_data.processing.functions import _get_input_dir diff --git a/tests/processing/test_readers.py b/tests/processing/test_readers.py index cf1d2ae4..f7e5adfd 100644 --- a/tests/processing/test_readers.py +++ b/tests/processing/test_readers.py @@ -2,6 +2,7 @@ import sys import pytest + from lit_data import map from lit_data.processing.readers import _PYARROW_AVAILABLE, BaseReader, ParquetReader diff --git a/tests/streaming/test_cache.py b/tests/streaming/test_cache.py index b8daece1..4ce8472f 100644 --- a/tests/streaming/test_cache.py +++ b/tests/streaming/test_cache.py @@ -19,17 +19,18 @@ import pytest import torch from lightning import seed_everything +from lightning.fabric import Fabric +from lightning.pytorch.demos.boring_classes import RandomDataset +from lightning_utilities.core.imports import RequirementCache +from lightning_utilities.test.warning import no_warning_call +from torch.utils.data import Dataset + from lit_data.streaming import Cache from lit_data.streaming.dataloader import CacheDataLoader from lit_data.streaming.dataset import StreamingDataset from lit_data.streaming.item_loader import TokensLoader from lit_data.streaming.serializers import Serializer from lit_data.utilities.env import _DistributedEnv -from lightning.fabric import Fabric -from lightning.pytorch.demos.boring_classes import RandomDataset -from lightning_utilities.core.imports import RequirementCache -from lightning_utilities.test.warning import no_warning_call -from torch.utils.data import Dataset _PIL_AVAILABLE = RequirementCache("PIL") _TORCH_VISION_AVAILABLE = RequirementCache("torchvision") diff --git a/tests/streaming/test_client.py b/tests/streaming/test_client.py index b9ca68ef..23f06c54 100644 --- a/tests/streaming/test_client.py +++ b/tests/streaming/test_client.py @@ -3,6 +3,7 @@ from unittest import mock import pytest + from lit_data.streaming import client diff --git a/tests/streaming/test_combined.py b/tests/streaming/test_combined.py index 76b5af84..077d3d05 100644 --- a/tests/streaming/test_combined.py +++ b/tests/streaming/test_combined.py @@ -4,12 +4,13 @@ import pytest import torch +from torch.utils.data import IterableDataset +from torch.utils.data.dataloader import DataLoader + from lit_data.streaming.cache import Cache from lit_data.streaming.combined import CombinedStreamingDataset from lit_data.streaming.dataloader import StreamingDataLoader from lit_data.streaming.dataset import Dir, StreamingDataset -from torch.utils.data import IterableDataset -from torch.utils.data.dataloader import DataLoader class TestCombinedStreamingDataset(CombinedStreamingDataset): diff --git a/tests/streaming/test_dataloader.py b/tests/streaming/test_dataloader.py index c74dec82..fc72ea4c 100644 --- a/tests/streaming/test_dataloader.py +++ b/tests/streaming/test_dataloader.py @@ -2,9 +2,10 @@ import pytest import torch +from torch import tensor + from lit_data.streaming import CombinedStreamingDataset, StreamingDataLoader from lit_data.streaming import dataloader as streaming_dataloader_module -from torch import tensor class TestStatefulDataset: diff --git a/tests/streaming/test_dataset.py b/tests/streaming/test_dataset.py index 752cf7e9..52ae8706 100644 --- a/tests/streaming/test_dataset.py +++ b/tests/streaming/test_dataset.py @@ -20,6 +20,8 @@ import pytest import torch from lightning import seed_everything +from torch.utils.data import DataLoader + from lit_data.processing import functions from lit_data.streaming import Cache from lit_data.streaming import dataset as dataset_module @@ -37,7 +39,6 @@ from lit_data.streaming.item_loader import TokensLoader from lit_data.streaming.shuffle import FullShuffle, NoShuffle from lit_data.utilities.env import _DistributedEnv, _WorkerEnv -from torch.utils.data import DataLoader def test_streaming_dataset(tmpdir, monkeypatch): diff --git a/tests/streaming/test_reader.py b/tests/streaming/test_reader.py index 5571a22e..6483b558 100644 --- a/tests/streaming/test_reader.py +++ b/tests/streaming/test_reader.py @@ -3,6 +3,7 @@ from time import sleep import numpy as np + from lit_data.streaming import reader from lit_data.streaming.cache import Cache from lit_data.streaming.config import ChunkedIndex @@ -44,7 +45,7 @@ def test_reader_chunk_removal(tmpdir): index = ChunkedIndex(i, cache._get_chunk_index_from_index(i), is_last_index=i == 24) assert cache[index] == i - assert len(os.listdir(cache_dir)) == 3 + assert len(os.listdir(cache_dir)) in [2, 3] def test_get_folder_size(tmpdir): diff --git a/tests/streaming/test_resolver.py b/tests/streaming/test_resolver.py index 6b1923e7..36a47ed3 100644 --- a/tests/streaming/test_resolver.py +++ b/tests/streaming/test_resolver.py @@ -3,7 +3,6 @@ from unittest import mock import pytest -from lit_data.streaming import resolver from lightning_cloud import login from lightning_cloud.openapi import ( Externalv1Cluster, @@ -17,6 +16,8 @@ V1ListDataConnectionsResponse, ) +from lit_data.streaming import resolver + @pytest.mark.skipif(sys.platform == "win32", reason="windows isn't supported") def test_src_resolver_s3_connections(monkeypatch): diff --git a/tests/streaming/test_sampler.py b/tests/streaming/test_sampler.py index 911ea278..8b1ee8c5 100644 --- a/tests/streaming/test_sampler.py +++ b/tests/streaming/test_sampler.py @@ -2,6 +2,7 @@ import pytest from lightning import seed_everything + from lit_data.streaming.sampler import CacheBatchSampler diff --git a/tests/streaming/test_serializer.py b/tests/streaming/test_serializer.py index 902f8aa8..d07e0629 100644 --- a/tests/streaming/test_serializer.py +++ b/tests/streaming/test_serializer.py @@ -20,6 +20,8 @@ import pytest import torch from lightning import seed_everything +from lightning_utilities.core.imports import RequirementCache + from lit_data.streaming.serializers import ( _AV_AVAILABLE, _NUMPY_DTYPES_MAPPING, @@ -36,7 +38,6 @@ TensorSerializer, VideoSerializer, ) -from lightning_utilities.core.imports import RequirementCache _PIL_AVAILABLE = RequirementCache("PIL") diff --git a/tests/streaming/test_writer.py b/tests/streaming/test_writer.py index dc00afa3..84e4ee91 100644 --- a/tests/streaming/test_writer.py +++ b/tests/streaming/test_writer.py @@ -18,12 +18,13 @@ import numpy as np import pytest from lightning import seed_everything +from lightning_utilities.core.imports import RequirementCache + from lit_data.streaming.compression import _ZSTD_AVAILABLE from lit_data.streaming.reader import BinaryReader from lit_data.streaming.sampler import ChunkedIndex from lit_data.streaming.writer import BinaryWriter from lit_data.utilities.format import _FORMAT_TO_RATIO -from lightning_utilities.core.imports import RequirementCache _PIL_AVAILABLE = RequirementCache("PIL") diff --git a/tests/utilities/test_packing.py b/tests/utilities/test_packing.py index cb7e7928..41af8591 100644 --- a/tests/utilities/test_packing.py +++ b/tests/utilities/test_packing.py @@ -1,4 +1,5 @@ import pytest + from lit_data.utilities.packing import _pack_greedily From 24efee8d881c3e2be9af239e62d57af62499abb5 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 09:39:16 +0000 Subject: [PATCH 13/18] update --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 8c0bb6be..15cbc36b 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,7 @@ def _load_py_module(fname, pkg="lit_data"): def _load_requirements(path_dir: str = _PATH_ROOT, file_name: str = "requirements.txt") -> list: - with open(os.path.join(path_dir, file_name)) as f: - reqs = parse_requirements(f).readlines() + reqs = parse_requirements(open(os.path.join(path_dir, file_name))).readlines() return list(map(str, reqs)) From fadc5b26e39936828e316a64239ce9002152867c Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 09:39:58 +0000 Subject: [PATCH 14/18] update --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8a00ec11..fa123ec0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ lint.ignore-init-module-imports = true [tool.ruff.lint.per-file-ignores] ".actions/*" = ["S101", "S310"] -"setup.py" = ["S101"] +"setup.py" = ["S101", "SIM115"] "examples/**" = [ "S101", # Use of `assert` detected "S113", # todo: Probable use of requests call without From 7a29e6706f5f48d967eddba3c1f27d1fa2d2fbf1 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 09:42:08 +0000 Subject: [PATCH 15/18] update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 15cbc36b..e8e19bc7 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ def _load_py_module(fname, pkg="lit_data"): def _load_requirements(path_dir: str = _PATH_ROOT, file_name: str = "requirements.txt") -> list: - reqs = parse_requirements(open(os.path.join(path_dir, file_name))).readlines() + reqs = parse_requirements(open(os.path.join(path_dir, file_name)).readlines()) return list(map(str, reqs)) From e0a5adb7919b0d39351ddb99449d8971f5804374 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 10:58:19 +0000 Subject: [PATCH 16/18] update --- MANIFEST.in | 2 +- requirements/test.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 55724b79..cf416353 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,7 +5,7 @@ recursive-exclude __pycache__ *.py[cod] *.orig # Include the README and CHANGELOG include *.md -recursive-include src *.md +recursive-include lit_data *.md # Include the license file include LICENSE diff --git a/requirements/test.txt b/requirements/test.txt index 560c8706..90b502be 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -9,3 +9,4 @@ pandas pyarrow pillow lightning +mypy From 2d8549456f6c3d547302c7c2986e466c45cb1ed0 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 11:17:22 +0000 Subject: [PATCH 17/18] update --- .github/workflows/check-typing.yml | 82 ++++++++++++++++++++++++++++++ .github/workflows/ci-checks.yml | 2 +- 2 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/check-typing.yml diff --git a/.github/workflows/check-typing.yml b/.github/workflows/check-typing.yml new file mode 100644 index 00000000..a48baace --- /dev/null +++ b/.github/workflows/check-typing.yml @@ -0,0 +1,82 @@ +name: Check formatting flow + +on: + workflow_call: + inputs: + actions-ref: + description: "Version of actions, normally the same as workflow" + required: true + type: string + python-version: + description: "Python version to use" + default: "3.9" + required: false + type: string + extra-typing: + description: "Package extra to be installed for type checks + include mypy" + default: "test" + required: false + type: string + +defaults: + run: + shell: bash + +jobs: + mypy: + runs-on: ubuntu-20.04 + steps: + - name: Checkout ๐Ÿ›Ž๏ธ + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Python ๐Ÿ ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Install dependencies + timeout-minutes: 20 + run: | + # don't use --upgrade to respect the version installed via setup.py + pip install -e '.[${{ inputs.extra-typing }}]' mypy \ + --extra-index-url https://download.pytorch.org/whl/cpu/torch_stable.html + pip list + + - name: Pull reusable ๐Ÿค– actions๏ธ + uses: actions/checkout@v4 + with: + ref: ${{ inputs.actions-ref }} + path: .cicd + repository: Lightning-AI/utilities + - name: Print ๐Ÿ–จ๏ธ dependencies + uses: ./.cicd/.github/actions/pip-list + with: + unfold: true + + # see: https://github.com/python/mypy/issues/10600#issuecomment-857351152 + - run: yes | mypy src --install-types || true + + - name: Check typing + # mypy uses the config file found in the following order: + # 1. mypy.ini + # 2. pyproject.toml + # 3. setup.cfg + # 4. $XDG_CONFIG_HOME/mypy/config + # 5. ~/.config/mypy/config + # 6. ~/.mypy.ini + # https://mypy.readthedocs.io/en/stable/config_file.html + run: mypy + + - name: suggest ignores + if: failure() + run: | + mypy --no-error-summary 2>&1 \ + | tr ':' ' ' \ + | awk '{print $1}' \ + | sort \ + | uniq \ + | sed 's/\.py//g; s|src/||g; s|\/__init__||g; s|\/|\.|g' \ + | xargs -I {} echo '"{}",' \ + || true diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml index 0d7d66f6..6cf189ff 100644 --- a/.github/workflows/ci-checks.yml +++ b/.github/workflows/ci-checks.yml @@ -25,7 +25,7 @@ jobs: azure-dir: "" check-package: - uses: Lightning-AI/utilities/.github/workflows/check-package.yml@main + uses: ./check-typing.yml with: actions-ref: main import-name: "lit_data" From d91e219486be1bf8a84600a5801d6faef4769edd Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 16 Feb 2024 11:26:21 +0000 Subject: [PATCH 18/18] update --- .github/workflows/label-conflicts.yml | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 .github/workflows/label-conflicts.yml diff --git a/.github/workflows/label-conflicts.yml b/.github/workflows/label-conflicts.yml deleted file mode 100644 index cc1c398f..00000000 --- a/.github/workflows/label-conflicts.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Label conflicts - -on: - push: - branches: ["main"] - pull_request_target: - types: ["synchronize", "reopened", "opened"] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} - cancel-in-progress: true - -jobs: - triage-conflicts: - runs-on: ubuntu-latest - steps: - - uses: mschilde/auto-label-merge-conflicts - with: - CONFLICT_LABEL_NAME: "has conflicts" - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - MAX_RETRIES: 3 - WAIT_MS: 5000