diff --git a/README.md b/README.md index 7d727653..bab69ed9 100644 --- a/README.md +++ b/README.md @@ -745,6 +745,75 @@ print(dataset[:]) +
+ ✅ Encrypt, decrypt data at chunk/sample level +  + +Secure data by applying encryption to individual samples or chunks, ensuring sensitive information is protected during storage. + +This example shows how to use the `FernetEncryption` class for sample-level encryption with a data optimization function. + +```python +from litdata import optimize +from litdata.utilities.encryption import FernetEncryption +import numpy as np +from PIL import Image + +# Initialize FernetEncryption with a password for sample-level encryption +fernet = FernetEncryption(password="your_secure_password", level="sample") +data_dir = "s3://my-bucket/optimized_data" + +def random_image(index): + """Generate a random image for demonstration purposes.""" + fake_img = Image.fromarray(np.random.randint(0, 255, (32, 32, 3), dtype=np.uint8)) + return {"image": fake_img, "class": index} + +# Optimize data while applying encryption +optimize( + fn=random_image, + inputs=list(range(5)), # Example inputs: [0, 1, 2, 3, 4] + num_workers=1, + output_dir=data_dir, + chunk_bytes="64MB", + encryption=fernet, +) + +# Save the encryption key to a file for later use +fernet.save("fernet.pem") +``` + +Load the encrypted data using the `StreamingDataset` class as follows: + +```python +from litdata import StreamingDataset +from litdata.utilities.encryption import FernetEncryption + +# Load the encryption key +fernet = FernetEncryption(password="your_secure_password", level="sample") +fernet.load("fernet.pem") + +# Create a streaming dataset for reading the encrypted samples +ds = StreamingDataset(input_dir=data_dir, encryption=fernet) +``` + +Implement your own encryption method: Subclass the `Encryption` class and define the necessary methods: + +```python +from litdata.utilities.encryption import Encryption + +class CustomEncryption(Encryption): + def encrypt(self, data): + # Implement your custom encryption logic here + return data + + def decrypt(self, data): + # Implement your custom decryption logic here + return data +``` + +This allows the data to remain secure while maintaining flexibility in the encryption method. +
+   ## Features for transforming datasets @@ -817,75 +886,6 @@ Explore an example setup of litdata with MinIO in the [LitData with MinIO](https -
- ✅ Supports encryption and decryption of data at chunk/sample level -  - -Secure your data by applying encryption to individual samples or chunks, ensuring sensitive information is protected during storage. - -This example demonstrates how to use the `FernetEncryption` class for sample-level encryption with a data optimization function. - -```python -from litdata import optimize -from litdata.utilities.encryption import FernetEncryption -import numpy as np -from PIL import Image - -# Initialize FernetEncryption with a password for sample-level encryption -fernet = FernetEncryption(password="your_secure_password", level="sample") -data_dir = "s3://my-bucket/optimized_data" - -def random_image(index): - """Generate a random image for demonstration purposes.""" - fake_img = Image.fromarray(np.random.randint(0, 255, (32, 32, 3), dtype=np.uint8)) - return {"image": fake_img, "class": index} - -# Optimize data while applying encryption -optimize( - fn=random_image, - inputs=list(range(5)), # Example inputs: [0, 1, 2, 3, 4] - num_workers=1, - output_dir=data_dir, - chunk_bytes="64MB", - encryption=fernet, -) - -# Save the encryption key to a file for later use -fernet.save("fernet.pem") -``` - -You can load the encrypted data using the `StreamingDataset` class as follows: - -```python -from litdata import StreamingDataset -from litdata.utilities.encryption import FernetEncryption - -# Load the encryption key -fernet = FernetEncryption(password="your_secure_password", level="sample") -fernet.load("fernet.pem") - -# Create a streaming dataset for reading the encrypted samples -ds = StreamingDataset(input_dir=data_dir, encryption=fernet) -``` - -If you want to implement your own encryption method, you can subclass the `Encryption` class and define the necessary methods: - -```python -from litdata.utilities.encryption import Encryption - -class CustomEncryption(Encryption): - def encrypt(self, data): - # Implement your custom encryption logic here - return data - - def decrypt(self, data): - # Implement your custom decryption logic here - return data -``` - -With this setup, you can ensure that your data remains secure while maintaining flexibility in how you handle encryption. -
-   ----