Skip to content

Commit

Permalink
resolve setup
Browse files Browse the repository at this point in the history
  • Loading branch information
tchaton committed Feb 15, 2024
1 parent f117d35 commit a3b2bc6
Show file tree
Hide file tree
Showing 41 changed files with 80 additions and 36 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
uses: Lightning-AI/utilities/.github/workflows/check-package.yml@main
with:
actions-ref: main
import-name: "pl_sandbox"
import-name: "lit_data"
artifact-name: dist-packages-${{ github.sha }}
testing-matrix: |
{
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
- name: Tests
run: |
coverage run --source pl_sandbox -m pytest src tests -v
coverage run --source lit_data -m pytest src tests -v
- name: Statistics
if: success()
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ test: clean
pip install -q -r _requirements/test.txt

# use this to run tests
python -m coverage run --source pl_sandbox -m pytest src tests -v --flake8
python -m coverage run --source lit_data -m pytest src tests -v --flake8
python -m coverage report

docs: clean
Expand Down
28 changes: 14 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<div align="center">

<img alt="Lightning" src="https://pl-flash-data.s3.amazonaws.com/lightning_data_logo.png" width="800px" style="max-width: 100%;">
<img alt="Lightning" src="https://pl-flash-data.s3.amazonaws.com/lit_data_logo.png" width="800px" style="max-width: 100%;">

<br/>
<br/>
Expand Down Expand Up @@ -45,7 +45,7 @@ Convert your raw dataset into Lightning Streaming format using the `optimize` op

```python
import numpy as np
from lightning.data import optimize
from lit_data import optimize
from PIL import Image


Expand Down Expand Up @@ -84,7 +84,7 @@ Here is an example with [AWS S3](https://aws.amazon.com/s3).
### 3. Use StreamingDataset and DataLoader

```python
from lightning.data import StreamingDataset
from lit_data import StreamingDataset
from torch.utils.data import DataLoader

# Remote path where full dataset is persistently stored
Expand Down Expand Up @@ -135,7 +135,7 @@ for i in range(1000):

```python
import os
from lightning.data import map
from lit_data import map
from PIL import Image

input_dir = "s3://my-bucket/my_images"
Expand Down Expand Up @@ -174,7 +174,7 @@ We have end-to-end free [Studios](https://lightning.ai) showing all the steps to
To scale data processing, create a free account on [lightning.ai](https://lightning.ai/) platform. With the platform, the `optimize` and `map` can start multiple machines to make data processing drastically faster as follows:

```python
from lightning.data import optimize, Machine
from lit_data import optimize, Machine

optimize(
...
Expand All @@ -186,7 +186,7 @@ optimize(
OR

```python
from lightning.data import map, Machine
from lit_data import map, Machine

map(
...
Expand Down Expand Up @@ -216,8 +216,8 @@ The `StreamingDataset` and `StreamingDataLoader` takes care of everything for yo
You can easily experiment with dataset mixtures using the CombinedStreamingDataset.

```python
from lightning.data import StreamingDataset, CombinedStreamingDataset
from lightning.data.streaming.item_loader import TokensLoader
from lit_data import StreamingDataset, CombinedStreamingDataset
from lit_data.streaming.item_loader import TokensLoader
from tqdm import tqdm
import os
from torch.utils.data import DataLoader
Expand Down Expand Up @@ -257,7 +257,7 @@ Note: The `StreamingDataLoader` is used by [Lit-GPT](https://github.com/Lightnin
```python
import os
import torch
from lightning.data import StreamingDataset, StreamingDataLoader
from lit_data import StreamingDataset, StreamingDataLoader

dataset = StreamingDataset("s3://my-bucket/my-data", shuffle=True)
dataloader = StreamingDataLoader(dataset, num_workers=os.cpu_count(), batch_size=64)
Expand All @@ -280,7 +280,7 @@ for batch_idx, batch in enumerate(dataloader):
The `StreamingDataLoader` supports profiling your data loading. Simply use the `profile_batches` argument as follows:

```python
from lightning.data import StreamingDataset, StreamingDataLoader
from lit_data import StreamingDataset, StreamingDataLoader

StreamingDataLoader(..., profile_batches=5)
```
Expand All @@ -292,7 +292,7 @@ This generates a Chrome trace called `result.json`. You can visualize this trace
Access the data you need when you need it.

```python
from lightning.data import StreamingDataset
from lit_data import StreamingDataset

dataset = StreamingDataset(...)

Expand All @@ -304,7 +304,7 @@ print(dataset[42]) # show the 42th element of the dataset
## ✢ Use data transforms

```python
from lightning.data import StreamingDataset, StreamingDataLoader
from lit_data import StreamingDataset, StreamingDataLoader
import torchvision.transforms.v2.functional as F

class ImagenetStreamingDataset(StreamingDataset):
Expand All @@ -326,7 +326,7 @@ for batch in dataloader:
Limit the size of the cache holding the chunks.

```python
from lightning.data import StreamingDataset
from lit_data import StreamingDataset

dataset = StreamingDataset(..., max_cache_size="10GB")
```
Expand All @@ -338,7 +338,7 @@ When processing large files like compressed [parquet files](https://en.wikipedia
```python
from pathlib import Path
import pyarrow.parquet as pq
from lightning.data import optimize
from lit_data import optimize
from tokenizer import Tokenizer
from functools import partial

Expand Down
14 changes: 7 additions & 7 deletions docs/source/_templates/theme_variables.jinja
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
{%- set external_urls = {
'github': 'https://github.com/Lightning-AI/lightning-Sandbox',
'github_issues': 'https://github.com/Lightning-AI/lightning-Sandbox/issues',
'github': 'https://github.com/Lightning-AI/lit-data',
'github_issues': 'https://github.com/Lightning-AI/lit-data/issues',
'contributing': 'https://github.com/Lightning-AI/lightning/blob/master/CONTRIBUTING.md',
'governance': 'https://github.com/Lightning-AI/lightning/blob/master/governance.md',
'docs': 'https://lightning-ai.github.io/lightning-Sandbox/',
'docs': 'https://lightning-ai.github.io/lit-data/',
'twitter': 'https://twitter.com/LightningAI',
'discuss': 'https://discord.com/invite/tfXFetEZxv',
'tutorials': 'https://lightning.ai',
'previous_pytorch_versions': 'https://lightning-ai.github.io/lightning-Sandbox/',
'home': 'https://lightning-ai.github.io/lightning-Sandbox/',
'previous_pytorch_versions': 'https://lightning-ai.github.io/lit-data/',
'home': 'https://lightning-ai.github.io/lit-data/',
'get_started': 'https://lightning.ai',
'features': 'https://lightning-ai.github.io/lightning-Sandbox/',
'features': 'https://lightning-ai.github.io/lit-data/',
'blog': 'https://www.Lightning.ai/blog',
'resources': 'https://lightning.ai',
'support': 'https://lightning-ai.github.io/lightning-Sandbox/',
'support': 'https://lightning-ai.github.io/lit-data/',
}
-%}
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
SPHINX_MOCK_REQUIREMENTS = int(os.environ.get("SPHINX_MOCK_REQUIREMENTS", True))

# alternative https://stackoverflow.com/a/67692/4521646
spec = spec_from_file_location("pl_sandbox/__about__.py", os.path.join(_PATH_SOURCE, "pl_sandbox", "__about__.py"))
spec = spec_from_file_location("lit_data/__about__.py", os.path.join(_PATH_SOURCE, "lit_data", "__about__.py"))
about = module_from_spec(spec)
spec.loader.exec_module(about)

# -- Project information -----------------------------------------------------

# this name shall match the project name in Github as it is used for linking to code
project = "Lightning-Sandbox"
project = "lit-data"
copyright = about.__copyright__
author = about.__author__

Expand Down
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Lightning-Sandbox
lit-data
=================

.. toctree::
Expand Down
43 changes: 43 additions & 0 deletions lit_data/__about__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import time

__author__ = "Lightning AI et al."
__author_email__ = "[email protected]"
__license__ = "Apache-2.0"
__copyright__ = f"Copyright (c) 2018-{time.strftime('%Y')}, {__author__}."
__homepage__ = "https://github.com/Lightning-AI/lit-data"
__docs_url__ = "https://lightning.ai/docs/pytorch/stable/"
# this has to be simple string, see: https://github.com/pypa/twine/issues/522
__docs__ = "The Deep Learning framework to train, deploy, and ship AI products Lightning fast."
__long_doc__ = """
What is it?
-----------
TBD @eden
""" # TODO

__all__ = [
"__author__",
"__author_email__",
"__copyright__",
"__docs__",
"__docs_url__",
"__homepage__",
"__license__",
"__version__",
]

__version__ = "0.0.1"
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 2 additions & 0 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lightning-utilities >=0.8.0, <0.10.0
lightning-cloud == 0.5.64 # Must be pinned to ensure compatibility
File renamed without changes.
17 changes: 8 additions & 9 deletions setup.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
from setuptools import find_packages, setup

_PATH_ROOT = os.path.dirname(__file__)
_PATH_SOURCE = os.path.join(_PATH_ROOT, "src")
_PATH_REQUIRES = os.path.join(_PATH_ROOT, "_requirements")


def _load_py_module(fname, pkg="pl_sandbox"):
spec = spec_from_file_location(os.path.join(pkg, fname), os.path.join(_PATH_SOURCE, pkg, fname))
def _load_py_module(fname, pkg="lit_data"):
spec = spec_from_file_location(os.path.join(pkg, fname), os.path.join(_PATH_ROOT, pkg, fname))
py = module_from_spec(spec)
spec.loader.exec_module(py)
return py
Expand Down Expand Up @@ -52,13 +51,13 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple =
# the goal of the project is simplicity for researchers, don't want to add too much
# engineer specific practices
setup(
name="lightning-sandbox",
name="lit_data",
version=about.__version__,
description=about.__docs__,
author=about.__author__,
author_email=about.__author_email__,
url=about.__homepage__,
download_url="https://github.com/Lightning-AI/lightning-sandbox",
download_url="https://github.com/Lightning-AI/lit-data",
license=about.__license__,
packages=find_packages(where="src"),
package_dir={"": "src"},
Expand All @@ -72,9 +71,9 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple =
install_requires=_load_requirements(),
extras_require=_prepare_extras(),
project_urls={
"Bug Tracker": "https://github.com/Lightning-AI/lightning-sandbox/issues",
"Documentation": "https://lightning-ai.github.io/lightning-sandbox/",
"Source Code": "https://github.com/Lightning-AI/lightning-sandbox",
"Bug Tracker": "https://github.com/Lightning-AI/lit-data/issues",
"Documentation": "https://lightning-ai.github.io/lit-data/",
"Source Code": "https://github.com/Lightning-AI/lit-data",
},
classifiers=[
"Environment :: Console",
Expand All @@ -97,4 +96,4 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple =
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
],
)
)

0 comments on commit a3b2bc6

Please sign in to comment.