landing-ai · hugohonda · Oct 22, 2024 · Oct 18, 2024
diff --git a/docs/flux1.md b/docs/flux1.md
@@ -2,19 +2,42 @@
 
 This example demonstrates using the Flux1 model to perform tasks such as image generation and mask inpainting based on text prompts.
 
-### Parameters 
-
-The following parameters can be passed to the model: 
+### Parameters
 
 - task: The task to perform using the model - either image generation ("generation") or mask inpainting ("inpainting").
 - prompt: The text prompt describing the desired modifications.
-- height: The height in pixels of the generated image. Defaults to 1024 for best results.
-- width: The width in pixels of the generated image. Defaults to 1024 for best results.
+- config: The `Flux1Config` class allows you to configure the parameters for the Flux1 model.
+- image (Image.Image): The original image to be modified - 
+used for the mask inpainting and image to image tasks.
+- mask_image (Image.Image): The mask image indicating areas to be inpainted - used for the mask inpainting task
+
+#### Flux1Config
+
+Below is an example of how to create and use a `Flux1Config` object:
+
+```python
+from vision_agent_tools.models.flux1 import Flux1Config
+
+config = Flux1Config(
+    height=512,
+    width=512,
+    num_inference_steps=28,
+    guidance_scale=3.5,
+    num_images_per_prompt=1,
+    max_sequence_length=512,
+    seed=42
+)
+```
+
+- height: The height in pixels of the generated image. Defaults to 512.
+- width: The width in pixels of the generated image. Defaults to 512.
 - num_inference_steps: The number of inference steps to perform. Defaults to 28.
 - guidance_scale: Guidance scale as defined in Classifier-Free Diffusion Guidance. Defaults to 3.5.
 - num_images_per_prompt: The number of images to generate per prompt. Defaults to 1.
 - max_sequence_length: Maximum sequence length to use with the prompt. Defaults to 512.
 - seed: Seed for the random number generator. If not provided, a random seed is used.
+- strength: Indicates extent to transform the reference `image`.
+Must be between 0 and 1. A value of 1 essentially ignores `image`.
 
 ## Perform image generation
 
@@ -29,20 +52,14 @@ flux1 = Flux1()
 generated_image = flux_model(
     task=Flux1Task.IMAGE_GENERATION,  # Image Generation Task
     prompt="A purple car in a futuristic cityscape",
-    height=1024,
-    width=1024,
-    num_inference_steps=10,
-    guidance_scale=3.5,
-    num_images_per_prompt=1,
-    max_sequence_length=512,
-    seed=42
+    config=config
 )
 generated_image.save("generated_car.png")
 ```
 
 --------------------------------------------------------------------
 
-## Alternatively, perform mask inpainting
+## Perform mask inpainting
 
 To perform mask inpainting, both the original image and the mask image need to be provided. These images have the same dimensions. The mask should clearly delineate the areas that you want to modify in the original image. Additionally, the inpainting process includes a strength  parameter, which controls the intensity of the modifications applied to the masked areas.
 
@@ -68,18 +85,41 @@ inpainted_image = flux_model(
     prompt=inpainting_prompt,
     image=image_to_edit,
     mask_image=mask_image,
-    height=1024,
-    width=1024,
-    strength=0.6,
-    num_inference_steps=10,
-    guidance_scale=3.5,
-    num_images_per_prompt=1,
-    max_sequence_length=512,
-    seed=42
+    config=config
 )
 
 inpainted_image.save("inpainted_dog_over_cat.png")
 
 ```
 
+--------------------------------------------------------------------
+
+## Perform image-to-image generation
+
+To perform image-to-image generation, you need to provide an original image along with a text prompt describing the desired modifications. The original image serves as the base, and the model will generate a new image based on the prompt.
+
+```python
+import torch
+from PIL import Image
+from vision_agent_tools.models.flux1 import Flux1, Flux1Task
+
+# You have an original image named "original_image.jpg" that you want to use for image-to-image generation
+original_image = Image.open("path/to/your/original_image.jpg").convert("RGB")  # Original image
+
+# Set a new prompt for image-to-image generation
+image_to_image_prompt = "A sunny beach with palm trees"
+
+# To perform image-to-image generation
+flux1 = Flux1()
+
+generated_image = flux_model(
+    task=Flux1Task.IMAGE_TO_IMAGE,  # Image-to-Image Generation Task
+    prompt=image_to_image_prompt,
+    image=original_image,
+    config=config
+)
+
+generated_image.save("generated_beach.png")
+```
+
 ::: vision_agent_tools.models.flux1
diff --git a/tests/models/test_flux1.py b/tests/models/test_flux1.py
@@ -1,25 +1,28 @@
 import pytest
+from pydantic import ValidationError
 from PIL import Image
 
-from vision_agent_tools.models.flux1 import Flux1, Flux1Task
+from vision_agent_tools.models.flux1 import Flux1, Flux1Task, Flux1Config
 
 
 def test_image_mask_inpainting(model):
     prompt = "cat wizard, Pixar style"
     image = Image.open("tests/shared_data/images/chihuahua.png")
     mask_image = Image.open("tests/shared_data/images/chihuahua_mask.png")
 
+    config = Flux1Config(
+        height=32,
+        width=32,
+        num_inference_steps=1,
+        seed=42,
+    )
+
     result = model(
         task=Flux1Task.MASK_INPAINTING,
         prompt=prompt,
         image=image,
         mask_image=mask_image,
-        height=32,
-        width=32,
-        num_inference_steps=1,
-        guidance_scale=7,
-        strength=0.85,
-        seed=42,
+        config=config,
     )
 
     assert result is not None
@@ -32,16 +35,19 @@ def test_image_mask_inpainting(model):
 def test_image_generation(model):
     prompt = "cat wizard, Pixar style"
 
-    result = model(
-        task=Flux1Task.IMAGE_GENERATION,
-        prompt=prompt,
+    config = Flux1Config(
         height=32,
         width=32,
-        guidance_scale=0.5,
         num_inference_steps=1,
         seed=42,
     )
 
+    result = model(
+        task=Flux1Task.IMAGE_GENERATION,
+        prompt=prompt,
+        config=config,
+    )
+
     assert result is not None
     assert len(result) == 1
     image = result[0]
@@ -52,22 +58,30 @@ def test_image_generation(model):
 def test_fail_image_generation_dimensions(model):
     prompt = "cat wizard, Pixar style"
 
-    height = 31
-    width = 31
     try:
+        config = Flux1Config(
+            height=31,
+            width=31,
+            num_inference_steps=1,
+            seed=42,
+        )
+
         model(
             task=Flux1Task.IMAGE_GENERATION,
             prompt=prompt,
-            height=height,
-            width=width,
-            num_inference_steps=1,
-            seed=42,
+            config=config,
         )
-    except ValueError as e:
+    except ValidationError as e:
         assert (
-            str(e)
-            == f"`height` and `width` have to be divisible by 8 but are {height} and {width}."
+            repr(e.errors()[0]["msg"])
+            == "'Assertion failed, height and width must be multiples of 8.'"
         )
+        assert repr(e.errors()[0]["type"]) == "'assertion_error'"
+        assert (
+            repr(e.errors()[1]["msg"])
+            == "'Assertion failed, height and width must be multiples of 8.'"
+        )
+        assert repr(e.errors()[1]["type"]) == "'assertion_error'"
 
 
 def test_fail_image_mask_size(model):
@@ -76,16 +90,20 @@ def test_fail_image_mask_size(model):
     mask_image = Image.open("tests/shared_data/images/chihuahua_mask.png")
     mask_image = mask_image.resize((64, 64))
 
+    config = Flux1Config(
+        height=32,
+        width=32,
+        num_inference_steps=1,
+        seed=42,
+    )
+
     try:
         model(
             task=Flux1Task.MASK_INPAINTING,
             prompt=prompt,
             image=image,
             mask_image=mask_image,
-            height=32,
-            width=32,
-            num_inference_steps=1,
-            seed=42,
+            config=config,
         )
     except ValueError as e:
         assert str(e) == "The image and mask image should have the same size."
@@ -97,18 +115,23 @@ def test_different_images_different_seeds(model):
     result_1 = model(
         task=Flux1Task.IMAGE_GENERATION,
         prompt=prompt,
-        height=32,
-        width=32,
-        num_inference_steps=1,
-        seed=42,
+        config=Flux1Config(
+            height=32,
+            width=32,
+            num_inference_steps=1,
+            seed=42,
+        ),
     )
 
     result_2 = model(
+        task=Flux1Task.IMAGE_GENERATION,
         prompt=prompt,
-        height=32,
-        width=32,
-        num_inference_steps=1,
-        seed=0,
+        config=Flux1Config(
+            height=32,
+            width=32,
+            num_inference_steps=1,
+            seed=0,
+        ),
     )
 
     assert result_1 is not None
@@ -127,23 +150,52 @@ def test_different_images_different_seeds(model):
 def test_multiple_images_per_prompt(model):
     prompt = "cat wizard, Pixar style"
 
-    result = model(
-        task=Flux1Task.IMAGE_GENERATION,
-        prompt=prompt,
+    config = Flux1Config(
         height=32,
         width=32,
         num_inference_steps=1,
         num_images_per_prompt=3,
         seed=42,
     )
 
+    result = model(
+        task=Flux1Task.IMAGE_GENERATION,
+        prompt=prompt,
+        config=config,
+    )
+
     assert result is not None
     assert len(result) == 3
     for image in result:
         assert image.mode == "RGB"
         assert image.size == (32, 32)
 
 
+def test_image_to_image(model):
+    prompt = "pixar style"
+    image = Image.open("tests/shared_data/images/chihuahua.png")
+
+    config = Flux1Config(
+        height=32,
+        width=32,
+        num_inference_steps=1,
+        seed=42,
+    )
+
+    result = model(
+        task=Flux1Task.IMAGE_TO_IMAGE,
+        prompt=prompt,
+        image=image,
+        config=config,
+    )
+
+    assert result is not None
+    assert len(result) == 1
+    image = result[0]
+    assert image.mode == "RGB"
+    assert image.size == (32, 32)
+
+
 @pytest.fixture(scope="session")
 def model():
     return Flux1()