resolve setup

Lightning-AI · Feb 15, 2024 · a3b2bc6 · a3b2bc6
1 parent f117d35
commit a3b2bc6
Show file tree

Hide file tree

Showing 41 changed files with 80 additions and 36 deletions.
diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml
@@ -28,7 +28,7 @@ jobs:
     uses: Lightning-AI/utilities/.github/workflows/check-package.yml@main
     with:
       actions-ref: main
-      import-name: "pl_sandbox"
+      import-name: "lit_data"
       artifact-name: dist-packages-${{ github.sha }}
       testing-matrix: |
         {

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
@@ -65,7 +65,7 @@ jobs:
 
       - name: Tests
         run: |
-          coverage run --source pl_sandbox -m pytest src tests -v
+          coverage run --source lit_data -m pytest src tests -v
 
       - name: Statistics
         if: success()

diff --git a/Makefile b/Makefile
@@ -10,7 +10,7 @@ test: clean
 	pip install -q -r _requirements/test.txt
 
 	# use this to run tests
-	python -m coverage run --source pl_sandbox -m pytest src tests -v --flake8
+	python -m coverage run --source lit_data -m pytest src tests -v --flake8
 	python -m coverage report
 
 docs: clean

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 <div align="center">
 
-<img alt="Lightning" src="https://pl-flash-data.s3.amazonaws.com/lightning_data_logo.png" width="800px" style="max-width: 100%;">
+<img alt="Lightning" src="https://pl-flash-data.s3.amazonaws.com/lit_data_logo.png" width="800px" style="max-width: 100%;">
 
 <br/>
 <br/>
@@ -45,7 +45,7 @@ Convert your raw dataset into Lightning Streaming format using the `optimize` op
 
 ```python
 import numpy as np
-from lightning.data import optimize
+from lit_data import optimize
 from PIL import Image
 
 
@@ -84,7 +84,7 @@ Here is an example with [AWS S3](https://aws.amazon.com/s3).
 ### 3. Use StreamingDataset and DataLoader
 
 ```python
-from lightning.data import StreamingDataset
+from lit_data import StreamingDataset
 from torch.utils.data import DataLoader
 
 # Remote path where full dataset is persistently stored
@@ -135,7 +135,7 @@ for i in range(1000):
 
 ```python
 import os
-from lightning.data import map
+from lit_data import map
 from PIL import Image
 
 input_dir = "s3://my-bucket/my_images"
@@ -174,7 +174,7 @@ We have end-to-end free [Studios](https://lightning.ai) showing all the steps to
 To scale data processing, create a free account on [lightning.ai](https://lightning.ai/) platform. With the platform, the `optimize` and `map` can start multiple machines to make data processing drastically faster as follows:
 
 ```python
-from lightning.data import optimize, Machine
+from lit_data import optimize, Machine
 
 optimize(
   ...
@@ -186,7 +186,7 @@ optimize(
 OR
 
 ```python
-from lightning.data import map, Machine
+from lit_data import map, Machine
 
 map(
   ...
@@ -216,8 +216,8 @@ The `StreamingDataset` and `StreamingDataLoader` takes care of everything for yo
 You can easily experiment with dataset mixtures using the CombinedStreamingDataset.
 
 ```python
-from lightning.data import StreamingDataset, CombinedStreamingDataset
-from lightning.data.streaming.item_loader import TokensLoader
+from lit_data import StreamingDataset, CombinedStreamingDataset
+from lit_data.streaming.item_loader import TokensLoader
 from tqdm import tqdm
 import os
 from torch.utils.data import DataLoader
@@ -257,7 +257,7 @@ Note: The `StreamingDataLoader` is used by [Lit-GPT](https://github.com/Lightnin
 ```python
 import os
 import torch
-from lightning.data import StreamingDataset, StreamingDataLoader
+from lit_data import StreamingDataset, StreamingDataLoader
 
 dataset = StreamingDataset("s3://my-bucket/my-data", shuffle=True)
 dataloader = StreamingDataLoader(dataset, num_workers=os.cpu_count(), batch_size=64)
@@ -280,7 +280,7 @@ for batch_idx, batch in enumerate(dataloader):
 The `StreamingDataLoader` supports profiling your data loading. Simply use the `profile_batches` argument as follows:
 
 ```python
-from lightning.data import StreamingDataset, StreamingDataLoader
+from lit_data import StreamingDataset, StreamingDataLoader
 
 StreamingDataLoader(..., profile_batches=5)
 ```
@@ -292,7 +292,7 @@ This generates a Chrome trace called `result.json`. You can visualize this trace
 Access the data you need when you need it.
 
 ```python
-from lightning.data import StreamingDataset
+from lit_data import StreamingDataset
 
 dataset = StreamingDataset(...)
 
@@ -304,7 +304,7 @@ print(dataset[42]) # show the 42th element of the dataset
 ## ✢ Use data transforms
 
 ```python
-from lightning.data import StreamingDataset, StreamingDataLoader
+from lit_data import StreamingDataset, StreamingDataLoader
 import torchvision.transforms.v2.functional as F
 
 class ImagenetStreamingDataset(StreamingDataset):
@@ -326,7 +326,7 @@ for batch in dataloader:
 Limit the size of the cache holding the chunks.
 
 ```python
-from lightning.data import StreamingDataset
+from lit_data import StreamingDataset
 
 dataset = StreamingDataset(..., max_cache_size="10GB")
 ```
@@ -338,7 +338,7 @@ When processing large files like compressed [parquet files](https://en.wikipedia
 ```python
 from pathlib import Path
 import pyarrow.parquet as pq
-from lightning.data import optimize
+from lit_data import optimize
 from tokenizer import Tokenizer
 from functools import partial
 

diff --git a/docs/source/_templates/theme_variables.jinja b/docs/source/_templates/theme_variables.jinja
@@ -1,18 +1,18 @@
 {%- set external_urls = {
-  'github': 'https://github.com/Lightning-AI/lightning-Sandbox',
-  'github_issues': 'https://github.com/Lightning-AI/lightning-Sandbox/issues',
+  'github': 'https://github.com/Lightning-AI/lit-data',
+  'github_issues': 'https://github.com/Lightning-AI/lit-data/issues',
   'contributing': 'https://github.com/Lightning-AI/lightning/blob/master/CONTRIBUTING.md',
   'governance': 'https://github.com/Lightning-AI/lightning/blob/master/governance.md',
-  'docs': 'https://lightning-ai.github.io/lightning-Sandbox/',
+  'docs': 'https://lightning-ai.github.io/lit-data/',
   'twitter': 'https://twitter.com/LightningAI',
   'discuss': 'https://discord.com/invite/tfXFetEZxv',
   'tutorials': 'https://lightning.ai',
-  'previous_pytorch_versions': 'https://lightning-ai.github.io/lightning-Sandbox/',
-  'home': 'https://lightning-ai.github.io/lightning-Sandbox/',
+  'previous_pytorch_versions': 'https://lightning-ai.github.io/lit-data/',
+  'home': 'https://lightning-ai.github.io/lit-data/',
   'get_started': 'https://lightning.ai',
-  'features': 'https://lightning-ai.github.io/lightning-Sandbox/',
+  'features': 'https://lightning-ai.github.io/lit-data/',
   'blog': 'https://www.Lightning.ai/blog',
   'resources': 'https://lightning.ai',
-  'support': 'https://lightning-ai.github.io/lightning-Sandbox/',
+  'support': 'https://lightning-ai.github.io/lit-data/',
 }
 -%}
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -22,14 +22,14 @@
 SPHINX_MOCK_REQUIREMENTS = int(os.environ.get("SPHINX_MOCK_REQUIREMENTS", True))
 
 # alternative https://stackoverflow.com/a/67692/4521646
-spec = spec_from_file_location("pl_sandbox/__about__.py", os.path.join(_PATH_SOURCE, "pl_sandbox", "__about__.py"))
+spec = spec_from_file_location("lit_data/__about__.py", os.path.join(_PATH_SOURCE, "lit_data", "__about__.py"))
 about = module_from_spec(spec)
 spec.loader.exec_module(about)
 
 # -- Project information -----------------------------------------------------
 
 # this name shall match the project name in Github as it is used for linking to code
-project = "Lightning-Sandbox"
+project = "lit-data"
 copyright = about.__copyright__
 author = about.__author__
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -3,7 +3,7 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
-Lightning-Sandbox
+lit-data
 =================
 
 .. toctree::

diff --git a/lit_data/__about__.py b/lit_data/__about__.py
@@ -0,0 +1,43 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+
+__author__ = "Lightning AI et al."
+__author_email__ = "[email protected]"
+__license__ = "Apache-2.0"
+__copyright__ = f"Copyright (c) 2018-{time.strftime('%Y')}, {__author__}."
+__homepage__ = "https://github.com/Lightning-AI/lit-data"
+__docs_url__ = "https://lightning.ai/docs/pytorch/stable/"
+# this has to be simple string, see: https://github.com/pypa/twine/issues/522
+__docs__ = "The Deep Learning framework to train, deploy, and ship AI products Lightning fast."
+__long_doc__ = """
+What is it?
+-----------
+
+TBD @eden
+"""  # TODO
+
+__all__ = [
+    "__author__",
+    "__author_email__",
+    "__copyright__",
+    "__docs__",
+    "__docs_url__",
+    "__homepage__",
+    "__license__",
+    "__version__",
+]
+
+__version__ = "0.0.1"
diff --git a/src/__init__.py → lit_data/__init__.py b/src/__init__.py → lit_data/__init__.py
diff --git a/src/constants.py → lit_data/constants.py b/src/constants.py → lit_data/constants.py
diff --git a/src/processing/__init__.py → lit_data/processing/__init__.py b/src/processing/__init__.py → lit_data/processing/__init__.py
diff --git a/src/processing/data_processor.py → lit_data/processing/data_processor.py b/src/processing/data_processor.py → lit_data/processing/data_processor.py
diff --git a/src/processing/functions.py → lit_data/processing/functions.py b/src/processing/functions.py → lit_data/processing/functions.py
diff --git a/src/processing/readers.py → lit_data/processing/readers.py b/src/processing/readers.py → lit_data/processing/readers.py
diff --git a/src/processing/utilities.py → lit_data/processing/utilities.py b/src/processing/utilities.py → lit_data/processing/utilities.py
diff --git a/src/streaming/__init__.py → lit_data/streaming/__init__.py b/src/streaming/__init__.py → lit_data/streaming/__init__.py
diff --git a/src/streaming/cache.py → lit_data/streaming/cache.py b/src/streaming/cache.py → lit_data/streaming/cache.py
diff --git a/src/streaming/client.py → lit_data/streaming/client.py b/src/streaming/client.py → lit_data/streaming/client.py
diff --git a/src/streaming/combined.py → lit_data/streaming/combined.py b/src/streaming/combined.py → lit_data/streaming/combined.py
diff --git a/src/streaming/compression.py → lit_data/streaming/compression.py b/src/streaming/compression.py → lit_data/streaming/compression.py
diff --git a/src/streaming/config.py → lit_data/streaming/config.py b/src/streaming/config.py → lit_data/streaming/config.py
diff --git a/src/streaming/dataloader.py → lit_data/streaming/dataloader.py b/src/streaming/dataloader.py → lit_data/streaming/dataloader.py
diff --git a/src/streaming/dataset.py → lit_data/streaming/dataset.py b/src/streaming/dataset.py → lit_data/streaming/dataset.py
diff --git a/src/streaming/downloader.py → lit_data/streaming/downloader.py b/src/streaming/downloader.py → lit_data/streaming/downloader.py
diff --git a/src/streaming/item_loader.py → lit_data/streaming/item_loader.py b/src/streaming/item_loader.py → lit_data/streaming/item_loader.py
diff --git a/src/streaming/reader.py → lit_data/streaming/reader.py b/src/streaming/reader.py → lit_data/streaming/reader.py
diff --git a/src/streaming/resolver.py → lit_data/streaming/resolver.py b/src/streaming/resolver.py → lit_data/streaming/resolver.py
diff --git a/src/streaming/sampler.py → lit_data/streaming/sampler.py b/src/streaming/sampler.py → lit_data/streaming/sampler.py
diff --git a/src/streaming/serializers.py → lit_data/streaming/serializers.py b/src/streaming/serializers.py → lit_data/streaming/serializers.py
diff --git a/src/streaming/shuffle.py → lit_data/streaming/shuffle.py b/src/streaming/shuffle.py → lit_data/streaming/shuffle.py
diff --git a/src/streaming/writer.py → lit_data/streaming/writer.py b/src/streaming/writer.py → lit_data/streaming/writer.py
diff --git a/src/utilities/__init__.py → lit_data/utilities/__init__.py b/src/utilities/__init__.py → lit_data/utilities/__init__.py
diff --git a/src/utilities/broadcast.py → lit_data/utilities/broadcast.py b/src/utilities/broadcast.py → lit_data/utilities/broadcast.py
diff --git a/src/utilities/env.py → lit_data/utilities/env.py b/src/utilities/env.py → lit_data/utilities/env.py
diff --git a/src/utilities/format.py → lit_data/utilities/format.py b/src/utilities/format.py → lit_data/utilities/format.py
diff --git a/src/utilities/packing.py → lit_data/utilities/packing.py b/src/utilities/packing.py → lit_data/utilities/packing.py
diff --git a/src/utilities/shuffle.py → lit_data/utilities/shuffle.py b/src/utilities/shuffle.py → lit_data/utilities/shuffle.py
diff --git a/_requirements/docs.txt → requirements/docs.txt b/_requirements/docs.txt → requirements/docs.txt
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -0,0 +1,2 @@
+lightning-utilities >=0.8.0, <0.10.0
+lightning-cloud == 0.5.64  # Must be pinned to ensure compatibility
diff --git a/_requirements/test.txt → requirements/test.txt b/_requirements/test.txt → requirements/test.txt
diff --git a/setup.py b/setup.py
@@ -8,12 +8,11 @@
 from setuptools import find_packages, setup
 
 _PATH_ROOT = os.path.dirname(__file__)
-_PATH_SOURCE = os.path.join(_PATH_ROOT, "src")
 _PATH_REQUIRES = os.path.join(_PATH_ROOT, "_requirements")
 
 
-def _load_py_module(fname, pkg="pl_sandbox"):
-    spec = spec_from_file_location(os.path.join(pkg, fname), os.path.join(_PATH_SOURCE, pkg, fname))
+def _load_py_module(fname, pkg="lit_data"):
+    spec = spec_from_file_location(os.path.join(pkg, fname), os.path.join(_PATH_ROOT, pkg, fname))
     py = module_from_spec(spec)
     spec.loader.exec_module(py)
     return py
@@ -52,13 +51,13 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple =
 # the goal of the project is simplicity for researchers, don't want to add too much
 # engineer specific practices
 setup(
-    name="lightning-sandbox",
+    name="lit_data",
     version=about.__version__,
     description=about.__docs__,
     author=about.__author__,
     author_email=about.__author_email__,
     url=about.__homepage__,
-    download_url="https://github.com/Lightning-AI/lightning-sandbox",
+    download_url="https://github.com/Lightning-AI/lit-data",
     license=about.__license__,
     packages=find_packages(where="src"),
     package_dir={"": "src"},
@@ -72,9 +71,9 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple =
     install_requires=_load_requirements(),
     extras_require=_prepare_extras(),
     project_urls={
-        "Bug Tracker": "https://github.com/Lightning-AI/lightning-sandbox/issues",
-        "Documentation": "https://lightning-ai.github.io/lightning-sandbox/",
-        "Source Code": "https://github.com/Lightning-AI/lightning-sandbox",
+        "Bug Tracker": "https://github.com/Lightning-AI/lit-data/issues",
+        "Documentation": "https://lightning-ai.github.io/lit-data/",
+        "Source Code": "https://github.com/Lightning-AI/lit-data",
     },
     classifiers=[
         "Environment :: Console",
@@ -97,4 +96,4 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple =
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
     ],
-)
+)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		lightning-utilities >=0.8.0, <0.10.0
		lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility