diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9f31b57
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,166 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Do not save this folder (mainly used for AI generated content)
+/outputs/
+/skybox/outputs/
+/environment/outputs/
+/asr/outputs/
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/VR-Environment-GenAI-Server.iml b/.idea/VR-Environment-GenAI-Server.iml
new file mode 100644
index 0000000..2c80e12
--- /dev/null
+++ b/.idea/VR-Environment-GenAI-Server.iml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..4e02086
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..06dd736
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ComfyUI/README.md b/ComfyUI/README.md
new file mode 100644
index 0000000..a0abf11
--- /dev/null
+++ b/ComfyUI/README.md
@@ -0,0 +1,26 @@
+# ComfyUI workflows
+
+This repository stores workflows for [ComfyUI](https://github.com/comfyanonymous/ComfyUI).
+Please note that the custom nodes involved have to be loaded manually in ComfyUI.
+
+## Philosophy
+
+The node defined here are totally independent of the rest of the project, and may reimplement existing
+features of the code base. The main purpose of this folder is to provide visual equivalents to the code features.
+
+## Main workflows
+
+The workflows can be found in the ``workflows`` folder. It is recommended to use small workflows steps,
+as the result of the biggest workflows are quite random.
+
+- sdxl.json: Basic image generation using Stable Diffusion, roughly equivalent to ``../skybox/diffusion.py``
+([HuggingFace](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)).
+- sdxl_with_refiner.json: Improved image generation that implements the
+[refiner](https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0).
+- inpainting_demo.json: some simple demo for inpainting.
+- sdxl_inpainting_demo.json: a more complete inpainting demo using pure SDXL features.
+- central_inpainting.json: An inpainting implementation with the standard functions,
+using [sdxl inpainting](https://huggingface.co/diffusers/stable-diffusion-xl-1.0-inpainting-0.1),
+that creates a horizontal tiling.
+- panorama_creator.json: an extended workflow to create a flat image as a panorama.
+- text_to_skybox.json: a complete workflow to generate a skybox from a prompt.
diff --git a/ComfyUI/custom_nodes/audiogen_nodes.py b/ComfyUI/custom_nodes/audiogen_nodes.py
new file mode 100644
index 0000000..002f3de
--- /dev/null
+++ b/ComfyUI/custom_nodes/audiogen_nodes.py
@@ -0,0 +1,216 @@
+"""
+A custom implementation of eigenpunk/ComfyUI-audio, with the main difference of accepting several prompts as an entry.
+
+Source : https://github.com/eigenpunk/ComfyUI-audio/blob/main/musicgen_nodes.py
+"""
+import ast
+import gc
+from contextlib import contextmanager
+from torch.nn.functional import pad
+from typing import Optional, Union
+
+import torch
+from audiocraft.models import AudioGen, MusicGen
+
+
+MODEL_NAMES = [
+ "musicgen-small",
+ "musicgen-medium",
+ "musicgen-melody",
+ "musicgen-large",
+ "musicgen-melody-large",
+ # TODO: stereo models seem not to be working out of the box
+ # "musicgen-stereo-small",
+ # "musicgen-stereo-medium",
+ # "musicgen-stereo-melody",
+ # "musicgen-stereo-large",
+ # "musicgen-stereo-melody-large",
+ "audiogen-medium",
+]
+
+
+def do_cleanup(cuda_cache=True):
+ gc.collect()
+ if cuda_cache:
+ torch.cuda.empty_cache()
+
+
+def object_to(obj, device=None, exclude=None, empty_cuda_cache=True, verbose=False):
+ """
+ recurse through an object and move any pytorch tensors/parameters/modules to the given device.
+ if device is None, cpu is used by default. if the device is a CUDA device and empty_cuda_cache is
+ enabled, this will also free unused CUDA memory cached by pytorch.
+ """
+
+ if not hasattr(obj, "__dict__"):
+ return obj
+
+ classname = type(obj).__name__
+ exclude = exclude or set()
+ device = device or "cpu"
+
+ def _move_and_recurse(o, name=""):
+ child_moved = False
+ for k, v in vars(o).items():
+ moved = False
+ cur_name = f"{name}.{k}" if name != "" else k
+ if cur_name in exclude:
+ continue
+ if isinstance(v, (torch.nn.Module, torch.nn.Parameter, torch.Tensor)):
+ setattr(o, k, v.to(device))
+ moved = True
+ elif hasattr(v, "__dict__"):
+ v, moved = _move_and_recurse(v, name=cur_name)
+ if moved: setattr(o, k, v)
+ if verbose and moved:
+ print(f"moved {classname}.{cur_name} to {device}")
+ child_moved |= moved
+ return o, child_moved
+
+ if isinstance(obj, torch.nn.Module):
+ obj = obj.to(device)
+
+ obj, _ = _move_and_recurse(obj)
+ if "cuda" in device and empty_cuda_cache:
+ torch.cuda.empty_cache()
+ return obj
+
+
+def tensors_to(tensors, device):
+ if isinstance(tensors, torch.Tensor):
+ return tensors.to(device)
+ if hasattr(tensors, "__dict__"):
+ return object_to(tensors, device, empty_cuda_cache=False)
+ if isinstance(tensors, (list, tuple)):
+ return [tensors_to(x, device) for x in tensors]
+ if isinstance(tensors, dict):
+ return {k: tensors_to(v, device) for k, v in tensors.items()}
+ if isinstance(tensors, set):
+ return {tensors_to(x, device) for x in tensors}
+ return tensors
+
+
+def tensors_to_cpu(tensors):
+ return tensors_to(tensors, "cpu")
+
+
+@contextmanager
+def obj_on_device(model, src="cpu", dst="cuda", exclude=None, empty_cuda_cache=True, verbose_move=False):
+ model = object_to(model, dst, exclude=exclude, empty_cuda_cache=empty_cuda_cache, verbose=verbose_move)
+ yield model
+ model = object_to(model, src, exclude=exclude, empty_cuda_cache=empty_cuda_cache, verbose=verbose_move)
+
+
+def stack_audio_tensors(tensors, mode="pad"):
+ sizes = [x.shape[-1] for x in tensors]
+
+ if mode in {"pad_l", "pad_r", "pad"}:
+ # pad input tensors to be equal length
+ dst_size = max(sizes)
+ stack_tensors = (
+ [pad(x, pad=(0, dst_size - x.shape[-1])) for x in tensors]
+ if mode == "pad_r"
+ else [pad(x, pad=(dst_size - x.shape[-1], 0)) for x in tensors]
+ )
+ elif mode in {"trunc_l", "trunc_r", "trunc"}:
+ # truncate input tensors to be equal length
+ dst_size = min(sizes)
+ stack_tensors = (
+ [x[:, x.shape[-1] - dst_size:] for x in tensors]
+ if mode == "trunc_r"
+ else [x[:, :dst_size] for x in tensors]
+ )
+ else:
+ assert False, 'unknown mode "{pad}"'
+
+ return torch.stack(stack_tensors)
+
+
+class MusicgenGenerate:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "model": ("MUSICGEN_MODEL",),
+ "text": ("STRING", {"default": "", "multiline": True}),
+ "batch_size": ("INT", {"default": 1, "min": 1}),
+ "duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 300.0, "step": 0.01}),
+ "cfg": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+ "top_k": ("INT", {"default": 250, "min": 0, "max": 10000, "step": 1}),
+ "top_p": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
+ "temperature": ("FLOAT", {"default": 1.0, "min": 0.001, "step": 0.001}),
+ "seed": ("INT", {"default": 0, "min": 0}),
+ },
+ "optional": {"audio": ("AUDIO_TENSOR",)},
+ }
+
+ RETURN_NAMES = ("RAW_AUDIO",)
+ RETURN_TYPES = ("AUDIO_TENSOR",)
+ FUNCTION = "generate"
+ CATEGORY = "audio"
+
+ def generate(
+ self,
+ model: Union[AudioGen, MusicGen],
+ text: str = "",
+ batch_size: int = 1,
+ duration: float = 10.0,
+ cfg: float = 1.0,
+ top_k: int = 250,
+ top_p: float = 0.0,
+ temperature: float = 1.0,
+ seed: int = 0,
+ audio: Optional[torch.Tensor] = None,
+ ):
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ # empty string = unconditional generation
+ if text == "":
+ text = None
+
+ model.set_generation_params(
+ top_k=top_k,
+ top_p=top_p,
+ temperature=temperature,
+ duration=duration,
+ cfg_coef=cfg,
+ )
+ with torch.random.fork_rng(), obj_on_device(model, dst=device, verbose_move=True) as m:
+ torch.manual_seed(seed)
+ text_input = ast.literal_eval(text)
+ print(text_input)
+ if audio is not None:
+ # do continuation with input audio and (optional) text prompting
+ if isinstance(audio, list):
+ # left-padded stacking into batch tensor
+ audio = stack_audio_tensors(audio)
+
+ if audio.shape[0] < batch_size:
+ # (try to) expand batch if smaller than requested
+ audio = audio.expand(batch_size, -1, -1)
+ elif audio.shape[0] > batch_size:
+ # truncate batch if larger than requested
+ audio = audio[:batch_size]
+
+ audio_input = tensors_to(audio, device)
+ audio_out = m.generate_continuation(audio_input, model.sample_rate, text_input, progress=True)
+ elif text is not None:
+ # do text-to-music
+ audio_out = m.generate(text_input, progress=True)
+ else:
+ # do unconditional music generation
+ audio_out = m.generate_unconditional(batch_size, progress=True)
+
+ audio_out = tensors_to_cpu(audio_out)
+
+ audio_out = torch.unbind(audio_out)
+ do_cleanup()
+ return list(audio_out),
+
+
+NODE_CLASS_MAPPINGS = {
+ "MusicgenGenerateCustom": MusicgenGenerate,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+ "MusicgenGenerateCustom": "Musicgen Generator Custom",
+}
\ No newline at end of file
diff --git a/ComfyUI/custom_nodes/mask_middle.py b/ComfyUI/custom_nodes/mask_middle.py
new file mode 100644
index 0000000..eae6104
--- /dev/null
+++ b/ComfyUI/custom_nodes/mask_middle.py
@@ -0,0 +1,96 @@
+"""
+Define a VerticalMiddleMask node that mask only the center of an image.
+"""
+import torch
+
+
+class VerticalMiddleMask:
+ """
+ A node that splits an image in the middle and returns the two parts in mirror.
+
+ Class methods
+ -------------
+ INPUT_TYPES (dict):
+ Tell the main program input parameters of nodes.
+ IS_CHANGED:
+ optional method to control when the node is re-executed.
+
+ Attributes
+ ----------
+ RETURN_TYPES (`tuple`):
+ The type of each element in the output tuple.
+ RETURN_NAMES (`tuple`):
+ The name of each output in the output tuple (Optional).
+ FUNCTION (`str`):
+ The name of the entry-point method. For example, if `FUNCTION = "execute"` then it will run Example().execute()
+ OUTPUT_NODE ([`bool`]):
+ If this node is an output node that outputs a result/image from the graph. The SaveImage node is an example.
+ The backend iterates on these output nodes and tries to execute all their parents
+ if their parent graph is properly connected.
+ Assumed to be False if not present.
+ CATEGORY (`str`):
+ The category the node should appear in the UI.
+ execute(s) -> tuple || None:
+ The entry point method. The name of this method must be the same as the value of property `FUNCTION`.
+ For example, if `FUNCTION = "execute"` then this method's name must be `execute`, if `FUNCTION = "foo"`
+ then it must be `foo`.
+ """
+
+ @classmethod
+ def INPUT_TYPES(self):
+ """
+ Return a dictionary which contains config for all input fields.
+
+ Some types (string): "MODEL", "VAE", "CLIP", "CONDITIONING", "LATENT", "IMAGE", "INT", "STRING", "FLOAT".
+ Input types "INT", "STRING" or "FLOAT" are special values for fields on the node.
+ The type can be a list for selection.
+
+ Returns: `dict`:
+ - Key input_fields_group (`string`): Can be either required, hidden or optional.
+ A node class must have property `required`
+ - Value input_fields (`dict`): Contains input fields config:
+ * Key field_name (`string`): Name of a entry-point method's argument
+ * Value field_config (`tuple`):
+ + First value is a string indicate the type of field or a list for selection.
+ + Second value is a config for type "INT", "STRING" or "FLOAT".
+ """
+ return {
+ "required": {
+ "image": ("IMAGE",),
+ "mask_width": ("INT", {
+ "default": 16,
+ "min": 0, # Minimum value
+ "max": 1024, # Maximum value
+ "step": 4, # Slider's step
+ "display": "number" # Cosmetic only: display as "number" or "slider"
+ })
+ },
+ }
+
+ RETURN_TYPES = ("MASK", )
+ RETURN_NAMES = ("Mask", )
+
+ FUNCTION = "main"
+
+ # OUTPUT_NODE = False
+
+ CATEGORY = "mask"
+
+ def main(self, image, mask_width):
+ mask = torch.zeros(image.shape[:-1])
+ image_center = image.shape[2] // 2
+ mask[:, :, image_center - mask_width // 2:image_center + mask_width // 2] = 1
+ return mask,
+
+
+# A dictionary that contains all nodes you want to export with their names
+# NOTE: names should be globally unique
+NODE_CLASS_MAPPINGS = {
+ "VerticalMiddleMask": VerticalMiddleMask
+}
+
+# A dictionary that contains the friendly/humanly readable titles for the nodes
+NODE_DISPLAY_NAME_MAPPINGS = {
+ "VerticalMiddleMask": "VerticalMiddleMask"
+}
+
diff --git a/ComfyUI/custom_nodes/middle_split.py b/ComfyUI/custom_nodes/middle_split.py
new file mode 100644
index 0000000..ccf886f
--- /dev/null
+++ b/ComfyUI/custom_nodes/middle_split.py
@@ -0,0 +1,101 @@
+"""
+Definition of the MiddleSplit node.
+"""
+import torch
+
+
+class MiddleSplit:
+ """
+ A node that splits an image in the middle and returns the two parts in mirror.
+
+ Class methods
+ -------------
+ INPUT_TYPES (dict):
+ Tell the main program input parameters of nodes.
+ IS_CHANGED:
+ optional method to control when the node is re-executed.
+
+ Attributes
+ ----------
+ RETURN_TYPES (`tuple`):
+ The type of each element in the output tuple.
+ RETURN_NAMES (`tuple`):
+ The name of each output in the output tuple (Optional).
+ FUNCTION (`str`):
+ The name of the entry-point method. For example, if `FUNCTION = "execute"` then it will run Example().execute()
+ OUTPUT_NODE ([`bool`]):
+ If this node is an output node that outputs a result/image from the graph. The SaveImage node is an example.
+ The backend iterates on these output nodes and tries to execute all their parents
+ if their parent graph is properly connected.
+ Assumed to be False if not present.
+ CATEGORY (`str`):
+ The category the node should appear in the UI.
+ execute(s) -> tuple || None:
+ The entry point method. The name of this method must be the same as the value of property `FUNCTION`.
+ For example, if `FUNCTION = "execute"` then this method's name must be `execute`, if `FUNCTION = "foo"`
+ then it must be `foo`.
+ """
+
+ @classmethod
+ def INPUT_TYPES(self):
+ """
+ Return a dictionary which contains config for all input fields.
+
+ Some types (string): "MODEL", "VAE", "CLIP", "CONDITIONING", "LATENT", "IMAGE", "INT", "STRING", "FLOAT".
+ Input types "INT", "STRING" or "FLOAT" are special values for fields on the node.
+ The type can be a list for selection.
+
+ Returns: `dict`:
+ - Key input_fields_group (`string`): Can be either required, hidden or optional.
+ A node class must have property `required`
+ - Value input_fields (`dict`): Contains input fields config:
+ * Key field_name (`string`): Name of a entry-point method's argument
+ * Value field_config (`tuple`):
+ + First value is a string indicate the type of field or a list for selection.
+ + Second value is a config for type "INT", "STRING" or "FLOAT".
+ """
+ return {
+ "required": {
+ "image": ("IMAGE",),
+ },
+ }
+
+ RETURN_TYPES = ("IMAGE", )
+ RETURN_NAMES = ("Image", )
+
+ FUNCTION = "main"
+
+ # OUTPUT_NODE = False
+
+ CATEGORY = "image"
+
+ def main(self, image):
+ center = image.size()[2] // 2
+ flipped = torch.fliplr(image.permute(1, 2, 3, 0)).permute(3, 0, 1, 2)
+ l_im, r_im = flipped[:, :, :center, :], flipped[:, :, center:, :]
+ return torch.cat((r_im, l_im), 2),
+
+ """
+ The node will always be re executed if any of the inputs change but
+ this method can be used to force the node to execute again even when the inputs don't change.
+ You can make this node return a number or a string.
+ This value will be compared to the one returned the last time the node was executed,
+ if it is different the node will be executed again.
+ This method is used in the core repo for the LoadImage node where they return the image hash as a string,
+ if the image hash changes between executions the LoadImage node is executed again.
+ """
+ # @classmethod
+ # def IS_CHANGED(s, image, string_field, int_field, float_field, print_to_screen):
+ # return ""
+
+
+# A dictionary that contains all nodes you want to export with their names
+# NOTE: names should be globally unique
+NODE_CLASS_MAPPINGS = {
+ "MiddleSplit": MiddleSplit
+}
+
+# A dictionary that contains the friendly/humanly readable titles for the nodes
+NODE_DISPLAY_NAME_MAPPINGS = {
+ "MiddleSplit": "Split and flip Image"
+}
diff --git a/ComfyUI/custom_nodes/reverse_lambert_projection.py b/ComfyUI/custom_nodes/reverse_lambert_projection.py
new file mode 100644
index 0000000..1515d73
--- /dev/null
+++ b/ComfyUI/custom_nodes/reverse_lambert_projection.py
@@ -0,0 +1,115 @@
+"""
+Define an outpainting node that stretches the borders of the original image.
+"""
+import torch
+
+
+def cylindrical_projection(image):
+ """
+ Compute a cylindrical projection from a flat image.
+
+ The x-axis is preserved, by the y-axis will be changed.
+ This is the inverse operation of a Lambert projection.
+
+ :param torch.tensor image: Input image
+ :return torch.tensor: Output image in reversed cylindrical projection
+ """
+ height, width = image.shape[1:3]
+ cylindrical_image = torch.empty(image.shape)
+
+ # Convert each pixel in the equirectangular image, from [0, height] to [0, height]
+ # As the view is essentially from a cylinder to a sphere, a cosine transformation is applied
+ # We then apply a reverse cosine
+ lines = height * (1 - torch.arccos(torch.linspace(-1, 1, height)) / torch.pi)
+ ratios = lines - torch.round(lines)
+ for y in range(height):
+ v = int(lines[y].item())
+ ratio = ratios[y]
+ if v + 1 < height:
+ interpolates = image[:, v + 1] * ratio + (1 - ratio) * image[:, v]
+ else:
+ interpolates = image[:, height - 1]
+
+ cylindrical_image[:, y] = interpolates
+
+ return cylindrical_image
+
+
+class ImageReverseLambert:
+ """
+ A node that splits an image in the middle and returns the two parts in mirror.
+
+ Class methods
+ -------------
+ INPUT_TYPES (dict):
+ Tell the main program input parameters of nodes.
+ IS_CHANGED:
+ optional method to control when the node is re-executed.
+
+ Attributes
+ ----------
+ RETURN_TYPES (`tuple`):
+ The type of each element in the output tuple.
+ RETURN_NAMES (`tuple`):
+ The name of each output in the output tuple (Optional).
+ FUNCTION (`str`):
+ The name of the entry-point method. For example, if `FUNCTION = "execute"` then it will run Example().execute()
+ OUTPUT_NODE ([`bool`]):
+ If this node is an output node that outputs a result/image from the graph. The SaveImage node is an example.
+ The backend iterates on these output nodes and tries to execute all their parents
+ if their parent graph is properly connected.
+ Assumed to be False if not present.
+ CATEGORY (`str`):
+ The category the node should appear in the UI.
+ execute(s) -> tuple || None:
+ The entry point method. The name of this method must be the same as the value of property `FUNCTION`.
+ For example, if `FUNCTION = "execute"` then this method's name must be `execute`, if `FUNCTION = "foo"`
+ then it must be `foo`.
+ """
+
+ @classmethod
+ def INPUT_TYPES(self):
+ """
+ Return a dictionary which contains config for all input fields.
+
+ Some types (string): "MODEL", "VAE", "CLIP", "CONDITIONING", "LATENT", "IMAGE", "INT", "STRING", "FLOAT".
+ Input types "INT", "STRING" or "FLOAT" are special values for fields on the node.
+ The type can be a list for selection.
+
+ Returns: `dict`:
+ - Key input_fields_group (`string`): Can be either required, hidden or optional.
+ A node class must have property `required`
+ - Value input_fields (`dict`): Contains input fields config:
+ * Key field_name (`string`): Name of a entry-point method's argument
+ * Value field_config (`tuple`):
+ + First value is a string indicate the type of field or a list for selection.
+ + Second value is a config for type "INT", "STRING" or "FLOAT".
+ """
+ return {
+ "required": {
+ "image": ("IMAGE",),
+ }
+ }
+
+ RETURN_TYPES = "IMAGE",
+
+ FUNCTION = "main"
+
+ # OUTPUT_NODE = False
+
+ CATEGORY = "image"
+
+ def main(self, image):
+ return cylindrical_projection(image),
+
+
+# A dictionary that contains all nodes you want to export with their names
+# NOTE: names should be globally unique
+NODE_CLASS_MAPPINGS = {
+ "ImageReverseLambert": ImageReverseLambert
+}
+
+# A dictionary that contains the friendly/humanly readable titles for the nodes
+NODE_DISPLAY_NAME_MAPPINGS = {
+ "ImageReverseLambert": "Project as Reversed Lambert"
+}
diff --git a/ComfyUI/custom_nodes/stretch_image_outpaiting.py b/ComfyUI/custom_nodes/stretch_image_outpaiting.py
new file mode 100644
index 0000000..439bcdc
--- /dev/null
+++ b/ComfyUI/custom_nodes/stretch_image_outpaiting.py
@@ -0,0 +1,135 @@
+"""
+Define an outpainting node that stretches the borders of the original image.
+"""
+import torch
+
+
+def pad_image(image, top, bottom):
+ initial_height = image.shape[1]
+ shape = list(image.shape)
+ shape[1] = initial_height + top + bottom
+ output = torch.zeros(shape)
+ mask = torch.zeros(output.shape[:-1])
+ output[:, top:top + initial_height] = image
+ mask[:, :top] = 1
+ mask[:, initial_height + top:] = 1
+ return output, mask
+
+
+def stretch_image(padded_image, top, bottom, border):
+ output = padded_image.clone().detach()
+ top_area = padded_image[:, top:top + border]
+ top_area = torch.mean(top_area, 1, keepdim=True).repeat([1, top, 1, 1])
+ output[:, :top] = top_area
+
+ if bottom > 0:
+ bottom_area = padded_image[:, -bottom - border:-bottom]
+ bottom_area = torch.mean(bottom_area, 1, keepdim=True).repeat([1, bottom, 1, 1])
+ output[:, -bottom:] = bottom_area
+ return output
+
+
+class ImageStretchForOutpaint:
+ """
+ A node that splits an image in the middle and returns the two parts in mirror.
+
+ Class methods
+ -------------
+ INPUT_TYPES (dict):
+ Tell the main program input parameters of nodes.
+ IS_CHANGED:
+ optional method to control when the node is re-executed.
+
+ Attributes
+ ----------
+ RETURN_TYPES (`tuple`):
+ The type of each element in the output tuple.
+ RETURN_NAMES (`tuple`):
+ The name of each output in the output tuple (Optional).
+ FUNCTION (`str`):
+ The name of the entry-point method. For example, if `FUNCTION = "execute"` then it will run Example().execute()
+ OUTPUT_NODE ([`bool`]):
+ If this node is an output node that outputs a result/image from the graph. The SaveImage node is an example.
+ The backend iterates on these output nodes and tries to execute all their parents
+ if their parent graph is properly connected.
+ Assumed to be False if not present.
+ CATEGORY (`str`):
+ The category the node should appear in the UI.
+ execute(s) -> tuple || None:
+ The entry point method. The name of this method must be the same as the value of property `FUNCTION`.
+ For example, if `FUNCTION = "execute"` then this method's name must be `execute`, if `FUNCTION = "foo"`
+ then it must be `foo`.
+ """
+
+ @classmethod
+ def INPUT_TYPES(self):
+ """
+ Return a dictionary which contains config for all input fields.
+
+ Some types (string): "MODEL", "VAE", "CLIP", "CONDITIONING", "LATENT", "IMAGE", "INT", "STRING", "FLOAT".
+ Input types "INT", "STRING" or "FLOAT" are special values for fields on the node.
+ The type can be a list for selection.
+
+ Returns: `dict`:
+ - Key input_fields_group (`string`): Can be either required, hidden or optional.
+ A node class must have property `required`
+ - Value input_fields (`dict`): Contains input fields config:
+ * Key field_name (`string`): Name of a entry-point method's argument
+ * Value field_config (`tuple`):
+ + First value is a string indicate the type of field or a list for selection.
+ + Second value is a config for type "INT", "STRING" or "FLOAT".
+ """
+ outpainting_settings = {
+ "default": 0,
+ "min": 0, # Minimum value
+ "max": 1024, # Maximum value
+ "step": 4, # Slider's step
+ "display": "number" # Cosmetic only: display as "number" or "slider"
+ }
+ return {
+ "required": {
+ "image": ("IMAGE",),
+ "top": ("INT", outpainting_settings),
+ "bottom": ("INT", {
+ "default": 0,
+ "min": 0, # Minimum value
+ "max": 1024, # Maximum value
+ "step": 4, # Slider's step
+ "display": "number" # Cosmetic only: display as "number" or "slider"
+ }),
+ "border": ("INT", {
+ "default": 5,
+ "min": 0,
+ "max": 1024,
+ "step": 1,
+ "display": "number"
+ })
+ },
+ }
+
+ RETURN_TYPES = ("IMAGE", "MASK", )
+ # RETURN_NAMES = ("Image", "Mask", )
+
+ FUNCTION = "main"
+
+ # OUTPUT_NODE = False
+
+ CATEGORY = "image"
+
+ def main(self, image, top, bottom, border):
+ padded_image, mask = pad_image(image, top, bottom)
+ output = stretch_image(padded_image, top, bottom, border)
+ return output, mask
+
+
+# A dictionary that contains all nodes you want to export with their names
+# NOTE: names should be globally unique
+NODE_CLASS_MAPPINGS = {
+ "ImageStretchForOutpaint": ImageStretchForOutpaint
+}
+
+# A dictionary that contains the friendly/humanly readable titles for the nodes
+NODE_DISPLAY_NAME_MAPPINGS = {
+ "ImageStretchForOutpaint": "Stretch Image for Outpainting"
+}
+
diff --git a/ComfyUI/workflows/central_inpainting.json b/ComfyUI/workflows/central_inpainting.json
new file mode 100644
index 0000000..726e8f9
--- /dev/null
+++ b/ComfyUI/workflows/central_inpainting.json
@@ -0,0 +1,1759 @@
+{
+ "last_node_id": 73,
+ "last_link_id": 118,
+ "nodes": [
+ {
+ "id": 25,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 68.05275875989106,
+ 669.8524541740031
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 0,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": null,
+ "shape": 3
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 95,
+ 99
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 50,
+ 53
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_base_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 27,
+ "type": "VAEEncodeForInpaint",
+ "pos": [
+ 568.0527587598914,
+ 379.85245417400307
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 16,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "pixels",
+ "type": "IMAGE",
+ "link": 49
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 50
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 67
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 55
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEEncodeForInpaint"
+ },
+ "widgets_values": [
+ 30
+ ]
+ },
+ {
+ "id": 46,
+ "type": "PrimitiveNode",
+ "pos": [
+ -1227.9435127507859,
+ 596.3205101333443
+ ],
+ "size": {
+ "0": 210,
+ "1": 76
+ },
+ "flags": {},
+ "order": 1,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [
+ 81,
+ 97
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ "a sunny valley"
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 47,
+ "type": "PrimitiveNode",
+ "pos": [
+ -1227.9435127507859,
+ 753.3205101333443
+ ],
+ "size": {
+ "0": 210,
+ "1": 76
+ },
+ "flags": {},
+ "order": 2,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [
+ 84,
+ 98
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ "a text, a logo, borders"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 56,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 765,
+ 661
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 6,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 95
+ },
+ {
+ "name": "text_g",
+ "type": "STRING",
+ "link": 97,
+ "widget": {
+ "name": "text_g"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 96
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 1024,
+ 1024,
+ 0,
+ 0,
+ 1024,
+ 1024,
+ "a sunny valley",
+ ""
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 57,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 763,
+ 715
+ ],
+ "size": {
+ "0": 400,
+ "1": 270.0000305175781
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 99
+ },
+ {
+ "name": "text_g",
+ "type": "STRING",
+ "link": 98,
+ "widget": {
+ "name": "text_g"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 100
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 1024,
+ 1024,
+ 0,
+ 0,
+ 1024,
+ 1024,
+ "a text, a logo, borders",
+ "a logo, text"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 32,
+ "type": "KSampler",
+ "pos": [
+ 1108.0527587598933,
+ 489.85245417400273
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 18,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 57
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 96
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 100
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 55
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 56,
+ 111
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 1000745780097480,
+ "randomize",
+ 50,
+ 8,
+ "euler",
+ "normal",
+ 0.9
+ ]
+ },
+ {
+ "id": 34,
+ "type": "UNETLoader",
+ "pos": [
+ 78,
+ 471
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {},
+ "order": 3,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 57
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "UNETLoader"
+ },
+ "widgets_values": [
+ "sd_xl_inpainting_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 44,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 1336,
+ 1293
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 4,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 82
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 80,
+ 85
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 77
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_refiner_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 51,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 1940,
+ 1260
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 80
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 81,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 83
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 1024,
+ 1024,
+ "a sunny valley"
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 53,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 1940,
+ 1300
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 9,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 85
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 84,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 86
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 1024,
+ 1024,
+ "a text, a logo, borders"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 52,
+ "type": "KSampler",
+ "pos": [
+ 2257,
+ 1080
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 21,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 82
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 83
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 86
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 111
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 88
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 701226939266553,
+ "randomize",
+ 30,
+ 8,
+ "euler",
+ "normal",
+ 0.2
+ ]
+ },
+ {
+ "id": 28,
+ "type": "VAEDecode",
+ "pos": [
+ 1481,
+ 765
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 20,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 56
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 53
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 112,
+ 114
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 2,
+ "type": "LoadImage",
+ "pos": [
+ -1257.9435127507859,
+ 215.32051013334373
+ ],
+ "size": {
+ "0": 315,
+ "1": 314
+ },
+ "flags": {},
+ "order": 5,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 41,
+ 102,
+ 115
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": null,
+ "shape": 3
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "LoadImage"
+ },
+ "widgets_values": [
+ "sunny valley.PNG",
+ "image"
+ ]
+ },
+ {
+ "id": 49,
+ "type": "VAEDecode",
+ "pos": [
+ 2624,
+ 1371
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 24,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 88
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 77
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 116,
+ 117
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 73,
+ "type": "MiddleSplit",
+ "pos": [
+ 2925,
+ 1372
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 27,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 117
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Image",
+ "type": "IMAGE",
+ "links": [
+ 118
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MiddleSplit"
+ }
+ },
+ {
+ "id": 67,
+ "type": "MiddleSplit",
+ "pos": [
+ 1819,
+ 775
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 22,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 112
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Image",
+ "type": "IMAGE",
+ "links": [
+ 113
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MiddleSplit"
+ }
+ },
+ {
+ "id": 71,
+ "type": "PreviewImage",
+ "pos": [
+ 3481,
+ 900
+ ],
+ "size": [
+ 210,
+ 246
+ ],
+ "flags": {},
+ "order": 12,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 115
+ }
+ ],
+ "title": "Initial Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 68,
+ "type": "SaveImage",
+ "pos": [
+ 3435,
+ 1229
+ ],
+ "size": [
+ 315,
+ 270
+ ],
+ "flags": {},
+ "order": 25,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 113
+ }
+ ],
+ "title": "Inpainted Image",
+ "properties": {},
+ "widgets_values": [
+ "inpainting"
+ ]
+ },
+ {
+ "id": 50,
+ "type": "SaveImage",
+ "pos": [
+ 3435,
+ 1584
+ ],
+ "size": {
+ "0": 315,
+ "1": 270
+ },
+ "flags": {},
+ "order": 28,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 118
+ }
+ ],
+ "title": "Inpainted Refined Image",
+ "properties": {},
+ "widgets_values": [
+ "inpainting_refined"
+ ]
+ },
+ {
+ "id": 42,
+ "type": "workflow/Blurry Mask",
+ "pos": [
+ -51.894901952124236,
+ 190.74081730961677
+ ],
+ "size": {
+ "0": 315,
+ "1": 318
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 14,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 106
+ }
+ ],
+ "outputs": [
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [
+ 67
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "workflow/Blurry Mask"
+ },
+ "widgets_values": [
+ 10,
+ 1,
+ "red"
+ ]
+ },
+ {
+ "id": 24,
+ "type": "VerticalMiddleMask",
+ "pos": [
+ -511.8949019521252,
+ 20.74081730961662
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {},
+ "order": 11,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 102
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Mask",
+ "type": "MASK",
+ "links": [
+ 106,
+ 107
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VerticalMiddleMask"
+ },
+ "widgets_values": [
+ 168
+ ]
+ },
+ {
+ "id": 21,
+ "type": "MiddleSplit",
+ "pos": [
+ -101.89490195212419,
+ 70.74081730961663
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 41
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Image",
+ "type": "IMAGE",
+ "links": [
+ 42,
+ 49,
+ 109
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MiddleSplit"
+ }
+ },
+ {
+ "id": 63,
+ "type": "MaskToImage",
+ "pos": [
+ -111.89490195212416,
+ -59.25918269038315
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 15,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 107
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 108
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MaskToImage"
+ }
+ },
+ {
+ "id": 61,
+ "type": "ImageBlend",
+ "pos": [
+ 218.10509804787588,
+ -79.25918269038308
+ ],
+ "size": {
+ "0": 315,
+ "1": 102
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 17,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image1",
+ "type": "IMAGE",
+ "link": 109
+ },
+ {
+ "name": "image2",
+ "type": "IMAGE",
+ "link": 108
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 110
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageBlend"
+ },
+ "widgets_values": [
+ 0.5,
+ "multiply"
+ ]
+ },
+ {
+ "id": 3,
+ "type": "PreviewImage",
+ "pos": [
+ 238.10509804787588,
+ 70.74081730961663
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 13,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 42
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 72,
+ "type": "PreviewImage",
+ "pos": [
+ 3486,
+ 444
+ ],
+ "size": [
+ 210,
+ 246
+ ],
+ "flags": {},
+ "order": 26,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 116
+ }
+ ],
+ "title": "Refined Inpainted Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 69,
+ "type": "PreviewImage",
+ "pos": [
+ 3482,
+ 102
+ ],
+ "size": [
+ 210,
+ 246
+ ],
+ "flags": {},
+ "order": 23,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 114
+ }
+ ],
+ "title": "Inpainted Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 64,
+ "type": "PreviewImage",
+ "pos": [
+ 3480,
+ -230
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 19,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 110
+ }
+ ],
+ "title": "Masked Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ }
+ ],
+ "links": [
+ [
+ 41,
+ 2,
+ 0,
+ 21,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 42,
+ 21,
+ 0,
+ 3,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 49,
+ 21,
+ 0,
+ 27,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 50,
+ 25,
+ 2,
+ 27,
+ 1,
+ "VAE"
+ ],
+ [
+ 53,
+ 25,
+ 2,
+ 28,
+ 1,
+ "VAE"
+ ],
+ [
+ 55,
+ 27,
+ 0,
+ 32,
+ 3,
+ "LATENT"
+ ],
+ [
+ 56,
+ 32,
+ 0,
+ 28,
+ 0,
+ "LATENT"
+ ],
+ [
+ 57,
+ 34,
+ 0,
+ 32,
+ 0,
+ "MODEL"
+ ],
+ [
+ 67,
+ 42,
+ 0,
+ 27,
+ 2,
+ "MASK"
+ ],
+ [
+ 77,
+ 44,
+ 2,
+ 49,
+ 1,
+ "VAE"
+ ],
+ [
+ 80,
+ 44,
+ 1,
+ 51,
+ 0,
+ "CLIP"
+ ],
+ [
+ 81,
+ 46,
+ 0,
+ 51,
+ 1,
+ "STRING"
+ ],
+ [
+ 82,
+ 44,
+ 0,
+ 52,
+ 0,
+ "MODEL"
+ ],
+ [
+ 83,
+ 51,
+ 0,
+ 52,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 84,
+ 47,
+ 0,
+ 53,
+ 1,
+ "STRING"
+ ],
+ [
+ 85,
+ 44,
+ 1,
+ 53,
+ 0,
+ "CLIP"
+ ],
+ [
+ 86,
+ 53,
+ 0,
+ 52,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 88,
+ 52,
+ 0,
+ 49,
+ 0,
+ "LATENT"
+ ],
+ [
+ 95,
+ 25,
+ 1,
+ 56,
+ 0,
+ "CLIP"
+ ],
+ [
+ 96,
+ 56,
+ 0,
+ 32,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 97,
+ 46,
+ 0,
+ 56,
+ 1,
+ "STRING"
+ ],
+ [
+ 98,
+ 47,
+ 0,
+ 57,
+ 1,
+ "STRING"
+ ],
+ [
+ 99,
+ 25,
+ 1,
+ 57,
+ 0,
+ "CLIP"
+ ],
+ [
+ 100,
+ 57,
+ 0,
+ 32,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 102,
+ 2,
+ 0,
+ 24,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 106,
+ 24,
+ 0,
+ 42,
+ 0,
+ "MASK"
+ ],
+ [
+ 107,
+ 24,
+ 0,
+ 63,
+ 0,
+ "MASK"
+ ],
+ [
+ 108,
+ 63,
+ 0,
+ 61,
+ 1,
+ "IMAGE"
+ ],
+ [
+ 109,
+ 21,
+ 0,
+ 61,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 110,
+ 61,
+ 0,
+ 64,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 111,
+ 32,
+ 0,
+ 52,
+ 3,
+ "LATENT"
+ ],
+ [
+ 112,
+ 28,
+ 0,
+ 67,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 113,
+ 67,
+ 0,
+ 68,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 114,
+ 28,
+ 0,
+ 69,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 115,
+ 2,
+ 0,
+ 71,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 116,
+ 49,
+ 0,
+ 72,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 117,
+ 49,
+ 0,
+ 73,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 118,
+ 73,
+ 0,
+ 50,
+ 0,
+ "IMAGE"
+ ]
+ ],
+ "groups": [
+ {
+ "title": "MaskCreation",
+ "bounding": [
+ -544,
+ -173,
+ 956,
+ 396
+ ],
+ "color": "#b06634",
+ "font_size": 24
+ },
+ {
+ "title": "Inpainting",
+ "bounding": [
+ 39,
+ 296,
+ 2001,
+ 576
+ ],
+ "color": "#b58b2a",
+ "font_size": 24
+ },
+ {
+ "title": "User Inputs",
+ "bounding": [
+ -1411,
+ 125,
+ 536,
+ 791
+ ],
+ "color": "#88A",
+ "font_size": 24
+ },
+ {
+ "title": "Refining",
+ "bounding": [
+ 1331,
+ 981,
+ 1879,
+ 493
+ ],
+ "color": "#8A8",
+ "font_size": 24
+ },
+ {
+ "title": "Ouput",
+ "bounding": [
+ 3356,
+ 788,
+ 458,
+ 1135
+ ],
+ "color": "#3f789e",
+ "font_size": 24
+ },
+ {
+ "title": "Previewes",
+ "bounding": [
+ 3355,
+ -336,
+ 459,
+ 1101
+ ],
+ "color": "#444",
+ "font_size": 24
+ }
+ ],
+ "config": {},
+ "extra": {
+ "groupNodes": {
+ "Blurry Mask": {
+ "nodes": [
+ {
+ "type": "MaskToImage",
+ "pos": [
+ 190,
+ 520
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MaskToImage"
+ },
+ "index": 0
+ },
+ {
+ "type": "ImageBlur",
+ "pos": [
+ 320,
+ 650
+ ],
+ "size": {
+ "0": 315,
+ "1": 82
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageBlur"
+ },
+ "widgets_values": [
+ 20,
+ 1
+ ],
+ "index": 1
+ },
+ {
+ "type": "PreviewImage",
+ "pos": [
+ 500,
+ 640
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 9,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": null
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ },
+ "index": 2
+ },
+ {
+ "type": "ImageToMask",
+ "pos": [
+ 380,
+ 540
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageToMask"
+ },
+ "widgets_values": [
+ "red"
+ ],
+ "index": 3
+ }
+ ],
+ "links": [
+ [
+ null,
+ 0,
+ 0,
+ 0,
+ 24,
+ "MASK"
+ ],
+ [
+ 0,
+ 0,
+ 1,
+ 0,
+ 39,
+ "IMAGE"
+ ],
+ [
+ 1,
+ 0,
+ 2,
+ 0,
+ 38,
+ "IMAGE"
+ ],
+ [
+ 1,
+ 0,
+ 3,
+ 0,
+ 38,
+ "IMAGE"
+ ]
+ ],
+ "external": [
+ [
+ 3,
+ 0,
+ "MASK"
+ ]
+ ]
+ }
+ }
+ },
+ "version": 0.4
+}
\ No newline at end of file
diff --git a/ComfyUI/workflows/inpainting_demo.json b/ComfyUI/workflows/inpainting_demo.json
new file mode 100644
index 0000000..6ab4ac0
--- /dev/null
+++ b/ComfyUI/workflows/inpainting_demo.json
@@ -0,0 +1,492 @@
+{
+ "last_node_id": 20,
+ "last_link_id": 20,
+ "nodes": [
+ {
+ "id": 15,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 401,
+ -372
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 0,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": null,
+ "shape": 3
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 16,
+ 17
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 8,
+ 15
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_base_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 1,
+ "type": "LoadImage",
+ "pos": [
+ 407,
+ -139
+ ],
+ "size": {
+ "0": 315,
+ "1": 314
+ },
+ "flags": {},
+ "order": 1,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 6
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [
+ 20
+ ],
+ "shape": 3,
+ "slot_index": 1
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "LoadImage"
+ },
+ "widgets_values": [
+ "clipspace/clipspace-mask-89995.png [input]",
+ "image"
+ ]
+ },
+ {
+ "id": 17,
+ "type": "KSampler",
+ "pos": [
+ 1249,
+ -338
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 6,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 11
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 18
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 19
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 12
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 13
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 850953539433992,
+ "randomize",
+ 50,
+ 8,
+ "euler",
+ "normal",
+ 1
+ ]
+ },
+ {
+ "id": 14,
+ "type": "VAEEncodeForInpaint",
+ "pos": [
+ 781,
+ -253
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 5,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "pixels",
+ "type": "IMAGE",
+ "link": 6
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 8
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 20
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 12
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEEncodeForInpaint"
+ },
+ "widgets_values": [
+ 6
+ ]
+ },
+ {
+ "id": 13,
+ "type": "UNETLoader",
+ "pos": [
+ 411,
+ -511
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {},
+ "order": 2,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 11
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "UNETLoader"
+ },
+ "widgets_values": [
+ "sd_xl_inpainting_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 20,
+ "type": "CLIPTextEncode",
+ "pos": [
+ 848,
+ -306
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 4,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 17
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 19
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncode"
+ },
+ "widgets_values": [
+ "text, logo"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 19,
+ "type": "CLIPTextEncode",
+ "pos": [
+ 849,
+ -350
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 3,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 16
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 18
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncode"
+ },
+ "widgets_values": [
+ "a bear"
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 18,
+ "type": "VAEDecode",
+ "pos": [
+ 1603,
+ -284
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 13
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 15
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 14
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 5,
+ "type": "PreviewImage",
+ "pos": [
+ 1901,
+ -284
+ ],
+ "size": [
+ 210,
+ 246
+ ],
+ "flags": {},
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 14
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ }
+ ],
+ "links": [
+ [
+ 6,
+ 1,
+ 0,
+ 14,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 8,
+ 15,
+ 2,
+ 14,
+ 1,
+ "VAE"
+ ],
+ [
+ 11,
+ 13,
+ 0,
+ 17,
+ 0,
+ "MODEL"
+ ],
+ [
+ 12,
+ 14,
+ 0,
+ 17,
+ 3,
+ "LATENT"
+ ],
+ [
+ 13,
+ 17,
+ 0,
+ 18,
+ 0,
+ "LATENT"
+ ],
+ [
+ 14,
+ 18,
+ 0,
+ 5,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 15,
+ 15,
+ 2,
+ 18,
+ 1,
+ "VAE"
+ ],
+ [
+ 16,
+ 15,
+ 1,
+ 19,
+ 0,
+ "CLIP"
+ ],
+ [
+ 17,
+ 15,
+ 1,
+ 20,
+ 0,
+ "CLIP"
+ ],
+ [
+ 18,
+ 19,
+ 0,
+ 17,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 19,
+ 20,
+ 0,
+ 17,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 20,
+ 1,
+ 1,
+ 14,
+ 2,
+ "MASK"
+ ]
+ ],
+ "groups": [],
+ "config": {},
+ "extra": {},
+ "version": 0.4
+}
\ No newline at end of file
diff --git a/ComfyUI/workflows/ollama_sound_generation.json b/ComfyUI/workflows/ollama_sound_generation.json
new file mode 100644
index 0000000..ebcdc6a
--- /dev/null
+++ b/ComfyUI/workflows/ollama_sound_generation.json
@@ -0,0 +1,1072 @@
+{
+ "last_node_id": 48,
+ "last_link_id": 48,
+ "nodes": [
+ {
+ "id": 4,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ -467.6802792066158,
+ -67.86949931796875
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 0,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 1
+ ],
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 3,
+ 5
+ ],
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 32
+ ],
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_base_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 6,
+ "type": "CLIPTextEncode",
+ "pos": [
+ 128.31972079338468,
+ 164.13050068203128
+ ],
+ "size": {
+ "0": 422.84503173828125,
+ "1": 164.31304931640625
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 3
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 13,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 4
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncode"
+ },
+ "widgets_values": [
+ "beautiful scenery nature glass bottle landscape, , purple galaxy bottle,"
+ ]
+ },
+ {
+ "id": 3,
+ "type": "KSampler",
+ "pos": [
+ 611.3197207933842,
+ -94.86949931796872
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 1
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 4
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 6
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 2
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 7
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 892606919538840,
+ "randomize",
+ 50,
+ 8,
+ "euler",
+ "normal",
+ 1
+ ]
+ },
+ {
+ "id": 8,
+ "type": "VAEDecode",
+ "pos": [
+ 1191.319720793386,
+ 20.130500682031233
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 13,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 7
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 33
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 45
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 9,
+ "type": "SaveImage",
+ "pos": [
+ 2056.3197207933817,
+ -99.86949931796872
+ ],
+ "size": {
+ "0": 210,
+ "1": 270
+ },
+ "flags": {},
+ "order": 16,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 45
+ }
+ ],
+ "properties": {},
+ "widgets_values": [
+ "ComfyUI"
+ ]
+ },
+ {
+ "id": 5,
+ "type": "EmptyLatentImage",
+ "pos": [
+ -456.68027920661575,
+ 85.67362235097657
+ ],
+ "size": {
+ "0": 315,
+ "1": 106
+ },
+ "flags": {},
+ "order": 1,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 2
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "EmptyLatentImage"
+ },
+ "widgets_values": [
+ 512,
+ 512,
+ 4
+ ]
+ },
+ {
+ "id": 11,
+ "type": "OllamaGenerate",
+ "pos": [
+ -872.5297477086955,
+ 490.6645569135614
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {},
+ "order": 2,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "response",
+ "type": "STRING",
+ "links": [
+ 10,
+ 13,
+ 22
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "OllamaGenerate"
+ },
+ "widgets_values": [
+ "Please give a short description of a natural landscape. Include only the description in the answer.",
+ "enable",
+ "http://127.0.0.1:11434",
+ "llama3"
+ ]
+ },
+ {
+ "id": 18,
+ "type": "OllamaGenerate",
+ "pos": [
+ 162.47025229130455,
+ 500.6645569135614
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {},
+ "order": 11,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "prompt",
+ "type": "STRING",
+ "link": 23,
+ "widget": {
+ "name": "prompt"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "response",
+ "type": "STRING",
+ "links": [
+ 25,
+ 44
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "OllamaGenerate"
+ },
+ "widgets_values": [
+ "What is Art?",
+ "enable",
+ "http://127.0.0.1:11434",
+ "llama3"
+ ]
+ },
+ {
+ "id": 7,
+ "type": "CLIPTextEncode",
+ "pos": [
+ 136.7439561421876,
+ -29.3077030701172
+ ],
+ "size": {
+ "0": 425.27801513671875,
+ "1": 180.6060791015625
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 4,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 5
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 6
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncode"
+ },
+ "widgets_values": [
+ "text, watermark, borders, frame"
+ ]
+ },
+ {
+ "id": 10,
+ "type": "ShowText|pysssss",
+ "pos": [
+ -387.5297477086957,
+ 466.6645569135614
+ ],
+ "size": {
+ "0": 490.6488037109375,
+ "1": 184.955078125
+ },
+ "flags": {},
+ "order": 6,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 10,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [],
+ "shape": 6,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ShowText|pysssss"
+ },
+ "widgets_values": [
+ "",
+ "A lush meadow stretches out before you, where tall grasses and wildflowers sway gently in the breeze. A winding stream runs along the edge, its crystal-clear water reflecting the blue sky above. In the distance, the rugged silhouette of a mountain range rises up, its peaks still capped with snow despite the warmth of summer. The air is filled with the sweet scent of blooming heather and the soft chirping of birds."
+ ]
+ },
+ {
+ "id": 23,
+ "type": "ShowText|pysssss",
+ "pos": [
+ 80.94844548828135,
+ 744.0770281187496
+ ],
+ "size": {
+ "0": 490,
+ "1": 180
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 12,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 24,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [],
+ "shape": 6,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ShowText|pysssss"
+ },
+ "widgets_values": [
+ "",
+ "Please describe the each sound that can be heard in the following landscape. Please only answer with a Python list of short description for each sound. Answer only with the list.\n\nRolling hills of golden grass sway gently in the breeze, as far as the eye can see. Scattered pine trees dot the landscape like emeralds on a velvet cloth, their trunks sturdy and strong. A meandering stream babbles its way through the scene, its crystal waters reflecting the cerulean sky above. Wildflowers of every hue bloom spontaneously, adding splashes of color to this serene and peaceful tableau."
+ ]
+ },
+ {
+ "id": 22,
+ "type": "StringFunction|pysssss",
+ "pos": [
+ -229.0515545117186,
+ 749.0770281187496
+ ],
+ "size": {
+ "0": 400,
+ "1": 244
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "text_b",
+ "type": "STRING",
+ "link": 22,
+ "widget": {
+ "name": "text_b"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [
+ 23,
+ 24
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "StringFunction|pysssss"
+ },
+ "widgets_values": [
+ "append",
+ "no",
+ "Please describe the each sound that can be heard in the following landscape. Please only answer with a Python list of short description for each sound. Answer only with the list.\n\n",
+ "",
+ ""
+ ]
+ },
+ {
+ "id": 34,
+ "type": "Reroute",
+ "pos": [
+ 200,
+ 52
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 5,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 32
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "VAE",
+ "links": [
+ 33
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 25,
+ "type": "MusicgenLoader",
+ "pos": [
+ 1182.5381409531253,
+ 454.2513616617188
+ ],
+ "size": {
+ "0": 315,
+ "1": 78
+ },
+ "flags": {},
+ "order": 3,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MUSICGEN_MODEL",
+ "links": [
+ 37
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "SR",
+ "type": "INT",
+ "links": [
+ 48
+ ],
+ "shape": 3,
+ "slot_index": 1
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MusicgenLoader"
+ },
+ "widgets_values": [
+ "audiogen-medium"
+ ]
+ },
+ {
+ "id": 33,
+ "type": "SaveAudio",
+ "pos": [
+ 2271,
+ 996
+ ],
+ "size": {
+ "0": 315,
+ "1": 106
+ },
+ "flags": {},
+ "order": 18,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "audio",
+ "type": "AUDIO_TENSOR",
+ "link": 41
+ },
+ {
+ "name": "sr",
+ "type": "INT",
+ "link": 47,
+ "widget": {
+ "name": "sr"
+ }
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "SaveAudio"
+ },
+ "widgets_values": [
+ 32000,
+ "wav",
+ "Audio"
+ ]
+ },
+ {
+ "id": 48,
+ "type": "Reroute",
+ "pos": [
+ 1596,
+ 792
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 9,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 48,
+ "pos": [
+ 37.5,
+ 0
+ ],
+ "widget": {
+ "name": "value"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "INT",
+ "links": [
+ 47
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": true
+ }
+ },
+ {
+ "id": 24,
+ "type": "ShowText|pysssss",
+ "pos": [
+ 648,
+ 573
+ ],
+ "size": {
+ "0": 431.43231201171875,
+ "1": 229.61392211914062
+ },
+ "flags": {},
+ "order": 14,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 25,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [],
+ "shape": 6,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ShowText|pysssss"
+ },
+ "widgets_values": [
+ "",
+ "['Gentle rustling of grass', 'Soft whispering of pine leaves', 'Bubbly gurgling of stream', 'Honeybees buzzing among flowers']"
+ ]
+ },
+ {
+ "id": 36,
+ "type": "MusicgenGenerateCustom",
+ "pos": [
+ 1673,
+ 501
+ ],
+ "size": {
+ "0": 400,
+ "1": 288
+ },
+ "flags": {},
+ "order": 15,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MUSICGEN_MODEL",
+ "link": 37
+ },
+ {
+ "name": "audio",
+ "type": "AUDIO_TENSOR",
+ "link": null
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 44,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "RAW_AUDIO",
+ "type": "AUDIO_TENSOR",
+ "links": [
+ 38,
+ 41
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MusicgenGenerateCustom"
+ },
+ "widgets_values": [
+ "",
+ 1,
+ 10,
+ 1,
+ 250,
+ 0,
+ 1,
+ 100,
+ "randomize"
+ ]
+ },
+ {
+ "id": 38,
+ "type": "PreviewImage",
+ "pos": [
+ 2583,
+ 518
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 19,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 39
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 37,
+ "type": "SpectrogramImage",
+ "pos": [
+ 2176,
+ 505
+ ],
+ "size": {
+ "0": 315,
+ "1": 178
+ },
+ "flags": {},
+ "order": 17,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "audio",
+ "type": "AUDIO_TENSOR",
+ "link": 38
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 39
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "SpectrogramImage"
+ },
+ "widgets_values": [
+ 200,
+ 50,
+ 100,
+ 1,
+ false,
+ true
+ ]
+ }
+ ],
+ "links": [
+ [
+ 1,
+ 4,
+ 0,
+ 3,
+ 0,
+ "MODEL"
+ ],
+ [
+ 2,
+ 5,
+ 0,
+ 3,
+ 3,
+ "LATENT"
+ ],
+ [
+ 3,
+ 4,
+ 1,
+ 6,
+ 0,
+ "CLIP"
+ ],
+ [
+ 4,
+ 6,
+ 0,
+ 3,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 5,
+ 4,
+ 1,
+ 7,
+ 0,
+ "CLIP"
+ ],
+ [
+ 6,
+ 7,
+ 0,
+ 3,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 7,
+ 3,
+ 0,
+ 8,
+ 0,
+ "LATENT"
+ ],
+ [
+ 10,
+ 11,
+ 0,
+ 10,
+ 0,
+ "STRING"
+ ],
+ [
+ 13,
+ 11,
+ 0,
+ 6,
+ 1,
+ "STRING"
+ ],
+ [
+ 22,
+ 11,
+ 0,
+ 22,
+ 0,
+ "STRING"
+ ],
+ [
+ 23,
+ 22,
+ 0,
+ 18,
+ 0,
+ "STRING"
+ ],
+ [
+ 24,
+ 22,
+ 0,
+ 23,
+ 0,
+ "STRING"
+ ],
+ [
+ 25,
+ 18,
+ 0,
+ 24,
+ 0,
+ "STRING"
+ ],
+ [
+ 32,
+ 4,
+ 2,
+ 34,
+ 0,
+ "*"
+ ],
+ [
+ 33,
+ 34,
+ 0,
+ 8,
+ 1,
+ "VAE"
+ ],
+ [
+ 37,
+ 25,
+ 0,
+ 36,
+ 0,
+ "MUSICGEN_MODEL"
+ ],
+ [
+ 38,
+ 36,
+ 0,
+ 37,
+ 0,
+ "AUDIO_TENSOR"
+ ],
+ [
+ 39,
+ 37,
+ 0,
+ 38,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 41,
+ 36,
+ 0,
+ 33,
+ 0,
+ "AUDIO_TENSOR"
+ ],
+ [
+ 44,
+ 18,
+ 0,
+ 36,
+ 2,
+ "STRING"
+ ],
+ [
+ 45,
+ 8,
+ 0,
+ 9,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 47,
+ 48,
+ 0,
+ 33,
+ 1,
+ "INT"
+ ],
+ [
+ 48,
+ 25,
+ 1,
+ 48,
+ 0,
+ "*"
+ ]
+ ],
+ "groups": [
+ {
+ "title": "Image generation",
+ "bounding": [
+ -478,
+ -174,
+ 2754,
+ 376
+ ],
+ "color": "#b58b2a",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Inputs preparation",
+ "bounding": [
+ -883,
+ 393,
+ 1984,
+ 426
+ ],
+ "color": "#8A8",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Audio Generation",
+ "bounding": [
+ 1173,
+ 380,
+ 1669,
+ 774
+ ],
+ "color": "#3f789e",
+ "font_size": 24,
+ "locked": false
+ }
+ ],
+ "config": {},
+ "extra": {},
+ "version": 0.4
+}
\ No newline at end of file
diff --git a/ComfyUI/workflows/panorama_creator.json b/ComfyUI/workflows/panorama_creator.json
new file mode 100644
index 0000000..411bdf0
--- /dev/null
+++ b/ComfyUI/workflows/panorama_creator.json
@@ -0,0 +1,2490 @@
+{
+ "last_node_id": 140,
+ "last_link_id": 241,
+ "nodes": [
+ {
+ "id": 72,
+ "type": "PreviewImage",
+ "pos": [
+ 6732.320592261197,
+ 1022.4875354181333
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 37,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 116
+ }
+ ],
+ "title": "Refined Inpainted Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 69,
+ "type": "PreviewImage",
+ "pos": [
+ 6728.320592261197,
+ 680.4875354181331
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 34,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 114
+ }
+ ],
+ "title": "Inpainted Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 64,
+ "type": "PreviewImage",
+ "pos": [
+ 6726.320592261197,
+ 348.48753541813363
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 30,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 110
+ }
+ ],
+ "title": "Masked Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 47,
+ "type": "PrimitiveNode",
+ "pos": [
+ -436.65502631696484,
+ 326.1107483300291
+ ],
+ "size": {
+ "0": 210,
+ "1": 76.00001525878906
+ },
+ "flags": {},
+ "order": 0,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [
+ 84,
+ 98
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ "a text, a logo, borders"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 46,
+ "type": "PrimitiveNode",
+ "pos": [
+ -441.48548678978943,
+ 197.57055928998972
+ ],
+ "size": {
+ "0": 210,
+ "1": 76.00000762939453
+ },
+ "flags": {},
+ "order": 1,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [
+ 81,
+ 97
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ "a sunny valley"
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 76,
+ "type": "PrimitiveNode",
+ "pos": [
+ -431.8105058402872,
+ 597.6940746637698
+ ],
+ "size": {
+ "0": 210,
+ "1": 82
+ },
+ "flags": {},
+ "order": 2,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "INT",
+ "type": "INT",
+ "links": [
+ 120,
+ 122,
+ 131
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "height"
+ }
+ }
+ ],
+ "title": "Initial Image Height\n",
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ 512,
+ "fixed"
+ ]
+ },
+ {
+ "id": 75,
+ "type": "PrimitiveNode",
+ "pos": [
+ -435.8105058402872,
+ 464.6940746637689
+ ],
+ "size": {
+ "0": 210,
+ "1": 82
+ },
+ "flags": {},
+ "order": 3,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "INT",
+ "type": "INT",
+ "links": [
+ 119,
+ 121,
+ 130
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "width"
+ }
+ }
+ ],
+ "title": "Initial Image Width\n",
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ 2048,
+ "fixed"
+ ]
+ },
+ {
+ "id": 82,
+ "type": "EmptyLatentImage",
+ "pos": [
+ 187.1067275294476,
+ -76.79146373353836
+ ],
+ "size": {
+ "0": 315,
+ "1": 106
+ },
+ "flags": {},
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "width",
+ "type": "INT",
+ "link": 130,
+ "widget": {
+ "name": "width"
+ }
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "link": 131,
+ "widget": {
+ "name": "height"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 132
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "EmptyLatentImage"
+ },
+ "widgets_values": [
+ 2048,
+ 512,
+ 2
+ ]
+ },
+ {
+ "id": 56,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 177.90388094884744,
+ 103.65074083354882
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 95
+ },
+ {
+ "name": "text_g",
+ "type": "STRING",
+ "link": 97,
+ "widget": {
+ "name": "text_g"
+ }
+ },
+ {
+ "name": "width",
+ "type": "INT",
+ "link": 119,
+ "widget": {
+ "name": "width"
+ }
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "link": 120,
+ "widget": {
+ "name": "height"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 124
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 2048,
+ 512,
+ 0,
+ 0,
+ 2048,
+ 512,
+ "a sunny valley",
+ ""
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 57,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 182.90388094884744,
+ 156.6507408335485
+ ],
+ "size": {
+ "0": 400,
+ "1": 270.0000305175781
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 9,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 99
+ },
+ {
+ "name": "text_g",
+ "type": "STRING",
+ "link": 98,
+ "widget": {
+ "name": "text_g"
+ }
+ },
+ {
+ "name": "width",
+ "type": "INT",
+ "link": 121,
+ "widget": {
+ "name": "width"
+ }
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "link": 122,
+ "widget": {
+ "name": "height"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 125
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 2048,
+ 512,
+ 0,
+ 0,
+ 2048,
+ 512,
+ "a text, a logo, borders",
+ "a logo, text, borders"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 126,
+ "type": "PreviewImage",
+ "pos": [
+ 6710,
+ -160
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 21,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 194
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 49,
+ "type": "VAEDecode",
+ "pos": [
+ 5563.898602057173,
+ 1528.4313459247896
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 35,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 88
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 77
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 116,
+ 117
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 130,
+ "type": "Reroute",
+ "pos": [
+ 2396.8792072153574,
+ 762.4135259871944
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 19,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 215
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "VAE",
+ "links": [
+ 216,
+ 217
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 68,
+ "type": "SaveImage",
+ "pos": [
+ 6668,
+ 1410
+ ],
+ "size": {
+ "0": 315,
+ "1": 270.0000305175781
+ },
+ "flags": {},
+ "order": 36,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 113
+ }
+ ],
+ "title": "Inpainted Image",
+ "properties": {},
+ "widgets_values": [
+ "skybox"
+ ]
+ },
+ {
+ "id": 50,
+ "type": "SaveImage",
+ "pos": [
+ 6677,
+ 1729
+ ],
+ "size": {
+ "0": 315,
+ "1": 270.00006103515625
+ },
+ "flags": {},
+ "order": 39,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 118
+ }
+ ],
+ "title": "Inpainted Refined Image",
+ "properties": {},
+ "widgets_values": [
+ "skybox_refined"
+ ]
+ },
+ {
+ "id": 73,
+ "type": "MiddleSplit",
+ "pos": [
+ 5821.898602057173,
+ 1611.4313459247896
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 38,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 117
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Image",
+ "type": "IMAGE",
+ "links": [
+ 118,
+ 231
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MiddleSplit"
+ }
+ },
+ {
+ "id": 136,
+ "type": "ImageReverseLambert",
+ "pos": [
+ 6061.898602057173,
+ 1666.4313459247896
+ ],
+ "size": {
+ "0": 226.8000030517578,
+ "1": 26
+ },
+ "flags": {},
+ "order": 40,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 231
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 232
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageReverseLambert"
+ }
+ },
+ {
+ "id": 137,
+ "type": "SaveImage",
+ "pos": [
+ 6664,
+ 2058
+ ],
+ "size": {
+ "0": 315,
+ "1": 270
+ },
+ "flags": {},
+ "order": 41,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 232
+ }
+ ],
+ "properties": {},
+ "widgets_values": [
+ "skybox_refined_lambert"
+ ]
+ },
+ {
+ "id": 125,
+ "type": "VAEDecode",
+ "pos": [
+ 1508.8792072153576,
+ 425.41352598719476
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 18,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 237
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 214
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 194,
+ 201,
+ 202
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 129,
+ "type": "Reroute",
+ "pos": [
+ 1088,
+ 631
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 16,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 235
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "VAE",
+ "links": [
+ 214,
+ 215
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 127,
+ "type": "Reroute",
+ "pos": [
+ 289,
+ 579
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 12,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 203
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "VAE",
+ "links": [
+ 204,
+ 235
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 79,
+ "type": "VAEDecode",
+ "pos": [
+ 1240,
+ 58
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 17,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 127
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 204
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 133
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 34,
+ "type": "UNETLoader",
+ "pos": [
+ -471,
+ 958
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {},
+ "order": 4,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 236
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "UNETLoader"
+ },
+ "widgets_values": [
+ "sd_xl_inpainting_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 78,
+ "type": "KSampler",
+ "pos": [
+ 671.1067275294507,
+ -109.79146373353866
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 15,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 123
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 124
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 125
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 132
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 127,
+ 237
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 7898391413359,
+ "randomize",
+ 20,
+ 8,
+ "euler",
+ "normal",
+ 0.8
+ ]
+ },
+ {
+ "id": 71,
+ "type": "PreviewImage",
+ "pos": [
+ 6737,
+ -552
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 20,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 133
+ }
+ ],
+ "title": "Initial Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 42,
+ "type": "workflow/Blurry Mask",
+ "pos": [
+ 2419,
+ 751
+ ],
+ "size": {
+ "0": 315,
+ "1": 318
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 24,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 106
+ }
+ ],
+ "outputs": [
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [
+ 67
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "workflow/Blurry Mask"
+ },
+ "widgets_values": [
+ 10,
+ 1,
+ "red"
+ ]
+ },
+ {
+ "id": 21,
+ "type": "MiddleSplit",
+ "pos": [
+ 2057,
+ 459
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 23,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 202
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Image",
+ "type": "IMAGE",
+ "links": [
+ 42,
+ 49,
+ 109
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MiddleSplit"
+ }
+ },
+ {
+ "id": 24,
+ "type": "VerticalMiddleMask",
+ "pos": [
+ 2058,
+ 658
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {},
+ "order": 22,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 201
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Mask",
+ "type": "MASK",
+ "links": [
+ 106,
+ 107
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VerticalMiddleMask"
+ },
+ "widgets_values": [
+ 168
+ ]
+ },
+ {
+ "id": 63,
+ "type": "MaskToImage",
+ "pos": [
+ 2429,
+ 635
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 25,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 107
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 108
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MaskToImage"
+ }
+ },
+ {
+ "id": 61,
+ "type": "ImageBlend",
+ "pos": [
+ 2769,
+ 688
+ ],
+ "size": {
+ "0": 315,
+ "1": 102
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 28,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image1",
+ "type": "IMAGE",
+ "link": 109
+ },
+ {
+ "name": "image2",
+ "type": "IMAGE",
+ "link": 108
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 110
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageBlend"
+ },
+ "widgets_values": [
+ 0.5,
+ "multiply"
+ ]
+ },
+ {
+ "id": 32,
+ "type": "KSampler",
+ "pos": [
+ 3329,
+ 784
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 29,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 236
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 238,
+ "slot_index": 1
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 239,
+ "slot_index": 2
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 55
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 56,
+ 111
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 10826311454902,
+ "randomize",
+ 50,
+ 8,
+ "euler",
+ "normal",
+ 0.9
+ ]
+ },
+ {
+ "id": 27,
+ "type": "VAEEncodeForInpaint",
+ "pos": [
+ 2845,
+ 902
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 27,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "pixels",
+ "type": "IMAGE",
+ "link": 49
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 216
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 67
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 55
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEEncodeForInpaint"
+ },
+ "widgets_values": [
+ 30
+ ]
+ },
+ {
+ "id": 28,
+ "type": "VAEDecode",
+ "pos": [
+ 3694,
+ 890
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 31,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 56
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 217
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 112,
+ 114
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 67,
+ "type": "MiddleSplit",
+ "pos": [
+ 3925,
+ 1019
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 33,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 112
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Image",
+ "type": "IMAGE",
+ "links": [
+ 113
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MiddleSplit"
+ }
+ },
+ {
+ "id": 139,
+ "type": "CLIPTextEncode",
+ "pos": [
+ 1540,
+ 564
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 240
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 238
+ ],
+ "shape": 3
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncode"
+ },
+ "widgets_values": [
+ ""
+ ]
+ },
+ {
+ "id": 25,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ -485.65502631696495,
+ 746.1107483300289
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 5,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 123
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 95,
+ 99,
+ 240,
+ 241
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 203
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_base_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 140,
+ "type": "CLIPTextEncode",
+ "pos": [
+ 1534,
+ 628
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 11,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 241
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 239
+ ],
+ "shape": 3
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncode"
+ },
+ "widgets_values": [
+ ""
+ ]
+ },
+ {
+ "id": 3,
+ "type": "PreviewImage",
+ "pos": [
+ 2935,
+ 396
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 26,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 42
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 44,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 4266,
+ 1271
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 6,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 82
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 80,
+ 85
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 77
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_refiner_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 53,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 4766,
+ 1526
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 14,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 85
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 84,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 86
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 1024,
+ 1024,
+ "a text, a logo, borders"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 51,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 4756,
+ 1467
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 13,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 80
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 81,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 83
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 1024,
+ 1024,
+ "a sunny valley"
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 52,
+ "type": "KSampler",
+ "pos": [
+ 5184,
+ 1463
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 32,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 82
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 83
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 86
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 111
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 88
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 418024017608548,
+ "randomize",
+ 30,
+ 8,
+ "euler",
+ "normal",
+ 0.2
+ ]
+ }
+ ],
+ "links": [
+ [
+ 42,
+ 21,
+ 0,
+ 3,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 49,
+ 21,
+ 0,
+ 27,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 55,
+ 27,
+ 0,
+ 32,
+ 3,
+ "LATENT"
+ ],
+ [
+ 56,
+ 32,
+ 0,
+ 28,
+ 0,
+ "LATENT"
+ ],
+ [
+ 67,
+ 42,
+ 0,
+ 27,
+ 2,
+ "MASK"
+ ],
+ [
+ 77,
+ 44,
+ 2,
+ 49,
+ 1,
+ "VAE"
+ ],
+ [
+ 80,
+ 44,
+ 1,
+ 51,
+ 0,
+ "CLIP"
+ ],
+ [
+ 81,
+ 46,
+ 0,
+ 51,
+ 1,
+ "STRING"
+ ],
+ [
+ 82,
+ 44,
+ 0,
+ 52,
+ 0,
+ "MODEL"
+ ],
+ [
+ 83,
+ 51,
+ 0,
+ 52,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 84,
+ 47,
+ 0,
+ 53,
+ 1,
+ "STRING"
+ ],
+ [
+ 85,
+ 44,
+ 1,
+ 53,
+ 0,
+ "CLIP"
+ ],
+ [
+ 86,
+ 53,
+ 0,
+ 52,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 88,
+ 52,
+ 0,
+ 49,
+ 0,
+ "LATENT"
+ ],
+ [
+ 95,
+ 25,
+ 1,
+ 56,
+ 0,
+ "CLIP"
+ ],
+ [
+ 97,
+ 46,
+ 0,
+ 56,
+ 1,
+ "STRING"
+ ],
+ [
+ 98,
+ 47,
+ 0,
+ 57,
+ 1,
+ "STRING"
+ ],
+ [
+ 99,
+ 25,
+ 1,
+ 57,
+ 0,
+ "CLIP"
+ ],
+ [
+ 106,
+ 24,
+ 0,
+ 42,
+ 0,
+ "MASK"
+ ],
+ [
+ 107,
+ 24,
+ 0,
+ 63,
+ 0,
+ "MASK"
+ ],
+ [
+ 108,
+ 63,
+ 0,
+ 61,
+ 1,
+ "IMAGE"
+ ],
+ [
+ 109,
+ 21,
+ 0,
+ 61,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 110,
+ 61,
+ 0,
+ 64,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 111,
+ 32,
+ 0,
+ 52,
+ 3,
+ "LATENT"
+ ],
+ [
+ 112,
+ 28,
+ 0,
+ 67,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 113,
+ 67,
+ 0,
+ 68,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 114,
+ 28,
+ 0,
+ 69,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 116,
+ 49,
+ 0,
+ 72,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 117,
+ 49,
+ 0,
+ 73,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 118,
+ 73,
+ 0,
+ 50,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 119,
+ 75,
+ 0,
+ 56,
+ 2,
+ "INT"
+ ],
+ [
+ 120,
+ 76,
+ 0,
+ 56,
+ 3,
+ "INT"
+ ],
+ [
+ 121,
+ 75,
+ 0,
+ 57,
+ 2,
+ "INT"
+ ],
+ [
+ 122,
+ 76,
+ 0,
+ 57,
+ 3,
+ "INT"
+ ],
+ [
+ 123,
+ 25,
+ 0,
+ 78,
+ 0,
+ "MODEL"
+ ],
+ [
+ 124,
+ 56,
+ 0,
+ 78,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 125,
+ 57,
+ 0,
+ 78,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 127,
+ 78,
+ 0,
+ 79,
+ 0,
+ "LATENT"
+ ],
+ [
+ 130,
+ 75,
+ 0,
+ 82,
+ 0,
+ "INT"
+ ],
+ [
+ 131,
+ 76,
+ 0,
+ 82,
+ 1,
+ "INT"
+ ],
+ [
+ 132,
+ 82,
+ 0,
+ 78,
+ 3,
+ "LATENT"
+ ],
+ [
+ 133,
+ 79,
+ 0,
+ 71,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 194,
+ 125,
+ 0,
+ 126,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 201,
+ 125,
+ 0,
+ 24,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 202,
+ 125,
+ 0,
+ 21,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 203,
+ 25,
+ 2,
+ 127,
+ 0,
+ "*"
+ ],
+ [
+ 204,
+ 127,
+ 0,
+ 79,
+ 1,
+ "VAE"
+ ],
+ [
+ 214,
+ 129,
+ 0,
+ 125,
+ 1,
+ "VAE"
+ ],
+ [
+ 215,
+ 129,
+ 0,
+ 130,
+ 0,
+ "*"
+ ],
+ [
+ 216,
+ 130,
+ 0,
+ 27,
+ 1,
+ "VAE"
+ ],
+ [
+ 217,
+ 130,
+ 0,
+ 28,
+ 1,
+ "VAE"
+ ],
+ [
+ 231,
+ 73,
+ 0,
+ 136,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 232,
+ 136,
+ 0,
+ 137,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 235,
+ 127,
+ 0,
+ 129,
+ 0,
+ "*"
+ ],
+ [
+ 236,
+ 34,
+ 0,
+ 32,
+ 0,
+ "MODEL"
+ ],
+ [
+ 237,
+ 78,
+ 0,
+ 125,
+ 0,
+ "LATENT"
+ ],
+ [
+ 238,
+ 139,
+ 0,
+ 32,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 239,
+ 140,
+ 0,
+ 32,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 240,
+ 25,
+ 1,
+ 139,
+ 0,
+ "CLIP"
+ ],
+ [
+ 241,
+ 25,
+ 1,
+ 140,
+ 0,
+ "CLIP"
+ ]
+ ],
+ "groups": [
+ {
+ "title": "Horizontal Tiling",
+ "bounding": [
+ 1326,
+ 320,
+ 2824,
+ 915
+ ],
+ "color": "#b58b2a",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "User Inputs",
+ "bounding": [
+ -496,
+ 124,
+ 335,
+ 731
+ ],
+ "color": "#88A",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Refining",
+ "bounding": [
+ 4232,
+ 1208,
+ 2062,
+ 534
+ ],
+ "color": "#8A8",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Ouput",
+ "bounding": [
+ 6657,
+ 1324,
+ 335,
+ 1038
+ ],
+ "color": "#3f789e",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Previewes",
+ "bounding": [
+ 6717,
+ 274,
+ 236,
+ 1004
+ ],
+ "color": "#444",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Initial Image",
+ "bounding": [
+ 143,
+ -249,
+ 1342,
+ 443
+ ],
+ "color": "#8AA",
+ "font_size": 24,
+ "locked": false
+ }
+ ],
+ "config": {},
+ "extra": {
+ "groupNodes": {
+ "Blurry Mask": {
+ "nodes": [
+ {
+ "type": "MaskToImage",
+ "pos": [
+ 190,
+ 520
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MaskToImage"
+ },
+ "index": 0
+ },
+ {
+ "type": "ImageBlur",
+ "pos": [
+ 320,
+ 650
+ ],
+ "size": {
+ "0": 315,
+ "1": 82
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageBlur"
+ },
+ "widgets_values": [
+ 20,
+ 1
+ ],
+ "index": 1
+ },
+ {
+ "type": "PreviewImage",
+ "pos": [
+ 500,
+ 640
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 9,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": null
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ },
+ "index": 2
+ },
+ {
+ "type": "ImageToMask",
+ "pos": [
+ 380,
+ 540
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageToMask"
+ },
+ "widgets_values": [
+ "red"
+ ],
+ "index": 3
+ }
+ ],
+ "links": [
+ [
+ null,
+ 0,
+ 0,
+ 0,
+ 24,
+ "MASK"
+ ],
+ [
+ 0,
+ 0,
+ 1,
+ 0,
+ 39,
+ "IMAGE"
+ ],
+ [
+ 1,
+ 0,
+ 2,
+ 0,
+ 38,
+ "IMAGE"
+ ],
+ [
+ 1,
+ 0,
+ 3,
+ 0,
+ 38,
+ "IMAGE"
+ ]
+ ],
+ "external": [
+ [
+ 3,
+ 0,
+ "MASK"
+ ]
+ ]
+ }
+ }
+ },
+ "version": 0.4
+}
\ No newline at end of file
diff --git a/ComfyUI/workflows/sdxl.json b/ComfyUI/workflows/sdxl.json
new file mode 100644
index 0000000..c8ed132
--- /dev/null
+++ b/ComfyUI/workflows/sdxl.json
@@ -0,0 +1,366 @@
+{
+ "last_node_id": 11,
+ "last_link_id": 9,
+ "nodes": [
+ {
+ "id": 6,
+ "type": "CLIPTextEncode",
+ "pos": [
+ 515,
+ 130
+ ],
+ "size": {
+ "0": 422.84503173828125,
+ "1": 164.31304931640625
+ },
+ "flags": {},
+ "order": 2,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 3
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 4
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncode"
+ },
+ "widgets_values": [
+ "a serene landscape under the sun"
+ ]
+ },
+ {
+ "id": 9,
+ "type": "SaveImage",
+ "pos": [
+ 1765.2780151367188,
+ 130
+ ],
+ "size": {
+ "0": 210,
+ "1": 270
+ },
+ "flags": {},
+ "order": 6,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 9
+ }
+ ],
+ "properties": {},
+ "widgets_values": [
+ "ComfyUI"
+ ]
+ },
+ {
+ "id": 8,
+ "type": "VAEDecode",
+ "pos": [
+ 1455.2780151367188,
+ 130
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 5,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 7
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 8
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 9
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 5,
+ "type": "EmptyLatentImage",
+ "pos": [
+ 100,
+ 130
+ ],
+ "size": {
+ "0": 315,
+ "1": 106
+ },
+ "flags": {},
+ "order": 0,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 2
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "EmptyLatentImage"
+ },
+ "widgets_values": [
+ 1024,
+ 512,
+ 1
+ ]
+ },
+ {
+ "id": 7,
+ "type": "CLIPTextEncode",
+ "pos": [
+ 515,
+ 424.31304931640625
+ ],
+ "size": {
+ "0": 425.27801513671875,
+ "1": 180.6060791015625
+ },
+ "flags": {},
+ "order": 3,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 5
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 6
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncode"
+ },
+ "widgets_values": [
+ "text, watermark"
+ ]
+ },
+ {
+ "id": 4,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 100,
+ 366
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 1,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 1
+ ],
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 3,
+ 5
+ ],
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 8
+ ],
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_base_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 3,
+ "type": "KSampler",
+ "pos": [
+ 1040.2780151367188,
+ 130
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 4,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 1
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 4
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 6
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 2
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 7
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 610640153339234,
+ "randomize",
+ 20,
+ 8,
+ "euler",
+ "normal",
+ 1
+ ]
+ }
+ ],
+ "links": [
+ [
+ 1,
+ 4,
+ 0,
+ 3,
+ 0,
+ "MODEL"
+ ],
+ [
+ 2,
+ 5,
+ 0,
+ 3,
+ 3,
+ "LATENT"
+ ],
+ [
+ 3,
+ 4,
+ 1,
+ 6,
+ 0,
+ "CLIP"
+ ],
+ [
+ 4,
+ 6,
+ 0,
+ 3,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 5,
+ 4,
+ 1,
+ 7,
+ 0,
+ "CLIP"
+ ],
+ [
+ 6,
+ 7,
+ 0,
+ 3,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 7,
+ 3,
+ 0,
+ 8,
+ 0,
+ "LATENT"
+ ],
+ [
+ 8,
+ 4,
+ 2,
+ 8,
+ 1,
+ "VAE"
+ ],
+ [
+ 9,
+ 8,
+ 0,
+ 9,
+ 0,
+ "IMAGE"
+ ]
+ ],
+ "groups": [],
+ "config": {},
+ "extra": {},
+ "version": 0.4
+}
\ No newline at end of file
diff --git a/ComfyUI/workflows/sdxl_inpainting_demo.json b/ComfyUI/workflows/sdxl_inpainting_demo.json
new file mode 100644
index 0000000..acd1b8b
--- /dev/null
+++ b/ComfyUI/workflows/sdxl_inpainting_demo.json
@@ -0,0 +1,830 @@
+{
+ "last_node_id": 32,
+ "last_link_id": 45,
+ "nodes": [
+ {
+ "id": 15,
+ "type": "UNETLoader",
+ "pos": [
+ 100,
+ 130
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {},
+ "order": 0,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 16
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "UNETLoader"
+ },
+ "widgets_values": [
+ "sd_xl_inpainting_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 9,
+ "type": "SaveImage",
+ "pos": [
+ 1845,
+ 130
+ ],
+ "size": {
+ "0": 210,
+ "1": 270
+ },
+ "flags": {},
+ "order": 12,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 9
+ }
+ ],
+ "properties": {},
+ "widgets_values": [
+ "ComfyUI"
+ ]
+ },
+ {
+ "id": 20,
+ "type": "VAEDecode",
+ "pos": [
+ 1845,
+ 530
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 13,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 38
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 23
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 22
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 21,
+ "type": "SaveImage",
+ "pos": [
+ 2155,
+ 130
+ ],
+ "size": [
+ 315,
+ 270
+ ],
+ "flags": {},
+ "order": 14,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 22
+ }
+ ],
+ "properties": {},
+ "widgets_values": [
+ "ComfyUI"
+ ]
+ },
+ {
+ "id": 8,
+ "type": "VAEDecode",
+ "pos": [
+ 1430,
+ 130
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 7
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 8
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 9
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 18,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 100,
+ 318
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 1,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 17
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 33,
+ 35
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 23
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_refiner_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 28,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 515,
+ 460
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {},
+ "order": 5,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 35
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 36
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 1024,
+ 1024,
+ "text, watermark"
+ ]
+ },
+ {
+ "id": 4,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 100,
+ 546
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 2,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [],
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 40,
+ 42
+ ],
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 8,
+ 11
+ ],
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_base_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 12,
+ "type": "VAEEncodeForInpaint",
+ "pos": [
+ 515,
+ 1590
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "pixels",
+ "type": "IMAGE",
+ "link": 10
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 11
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 12
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 13
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEEncodeForInpaint"
+ },
+ "widgets_values": [
+ 30
+ ]
+ },
+ {
+ "id": 3,
+ "type": "KSampler",
+ "pos": [
+ 1015,
+ 130
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 9,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 16
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 41
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 43
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 13
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 7,
+ 39
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 424987263190372,
+ "randomize",
+ 30,
+ 8,
+ "euler",
+ "normal",
+ 0.9
+ ]
+ },
+ {
+ "id": 19,
+ "type": "KSampler",
+ "pos": [
+ 1430,
+ 306
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 11,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 17
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 34
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 36
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 39
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 38
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 1043567145924620,
+ "randomize",
+ 20,
+ 8,
+ "euler",
+ "normal",
+ 1
+ ]
+ },
+ {
+ "id": 10,
+ "type": "LoadImage",
+ "pos": [
+ 100,
+ 774
+ ],
+ "size": {
+ "0": 315,
+ "1": 314
+ },
+ "flags": {},
+ "order": 3,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 10
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [
+ 12
+ ],
+ "shape": 3,
+ "slot_index": 1
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "LoadImage"
+ },
+ "widgets_values": [
+ "clipspace/clipspace-mask-257596.png [input]",
+ "image"
+ ]
+ },
+ {
+ "id": 27,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 515,
+ 130
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {},
+ "order": 4,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 33
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 34
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 1024,
+ 1024,
+ "the sun shining"
+ ]
+ },
+ {
+ "id": 29,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 515,
+ 790
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {},
+ "order": 6,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 40
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 41
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 1024,
+ 1024,
+ 0,
+ 0,
+ 1024,
+ 1024,
+ "the sun shining",
+ ""
+ ]
+ },
+ {
+ "id": 30,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 515,
+ 1190
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {},
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 42
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 43
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 1024,
+ 1024,
+ 0,
+ 0,
+ 1024,
+ 1024,
+ "text, watermark",
+ "text, watermark"
+ ]
+ }
+ ],
+ "links": [
+ [
+ 7,
+ 3,
+ 0,
+ 8,
+ 0,
+ "LATENT"
+ ],
+ [
+ 8,
+ 4,
+ 2,
+ 8,
+ 1,
+ "VAE"
+ ],
+ [
+ 9,
+ 8,
+ 0,
+ 9,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 10,
+ 10,
+ 0,
+ 12,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 11,
+ 4,
+ 2,
+ 12,
+ 1,
+ "VAE"
+ ],
+ [
+ 12,
+ 10,
+ 1,
+ 12,
+ 2,
+ "MASK"
+ ],
+ [
+ 13,
+ 12,
+ 0,
+ 3,
+ 3,
+ "LATENT"
+ ],
+ [
+ 16,
+ 15,
+ 0,
+ 3,
+ 0,
+ "MODEL"
+ ],
+ [
+ 17,
+ 18,
+ 0,
+ 19,
+ 0,
+ "MODEL"
+ ],
+ [
+ 22,
+ 20,
+ 0,
+ 21,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 23,
+ 18,
+ 2,
+ 20,
+ 1,
+ "VAE"
+ ],
+ [
+ 33,
+ 18,
+ 1,
+ 27,
+ 0,
+ "CLIP"
+ ],
+ [
+ 34,
+ 27,
+ 0,
+ 19,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 35,
+ 18,
+ 1,
+ 28,
+ 0,
+ "CLIP"
+ ],
+ [
+ 36,
+ 28,
+ 0,
+ 19,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 38,
+ 19,
+ 0,
+ 20,
+ 0,
+ "LATENT"
+ ],
+ [
+ 39,
+ 3,
+ 0,
+ 19,
+ 3,
+ "LATENT"
+ ],
+ [
+ 40,
+ 4,
+ 1,
+ 29,
+ 0,
+ "CLIP"
+ ],
+ [
+ 41,
+ 29,
+ 0,
+ 3,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 42,
+ 4,
+ 1,
+ 30,
+ 0,
+ "CLIP"
+ ],
+ [
+ 43,
+ 30,
+ 0,
+ 3,
+ 2,
+ "CONDITIONING"
+ ]
+ ],
+ "groups": [],
+ "config": {},
+ "extra": {},
+ "version": 0.4
+}
\ No newline at end of file
diff --git a/ComfyUI/workflows/sdxl_with_refiner.json b/ComfyUI/workflows/sdxl_with_refiner.json
new file mode 100644
index 0000000..488daaf
--- /dev/null
+++ b/ComfyUI/workflows/sdxl_with_refiner.json
@@ -0,0 +1,952 @@
+{
+ "last_node_id": 34,
+ "last_link_id": 53,
+ "nodes": [
+ {
+ "id": 24,
+ "type": "VAEDecode",
+ "pos": [
+ 2233.40966796875,
+ -157.24490356445312
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 13,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 25
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 35
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 43
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 22,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 1228.8193005475018,
+ -192.48979517142766
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 0,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 44
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 37,
+ 40
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 35
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_refiner_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 31,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 1583,
+ 36
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 40
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 39,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 41
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 2048,
+ 1024,
+ "text, logo, borders, frame"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 30,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 1580,
+ -16
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 6,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 37
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 38,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 42
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 2048,
+ 1024,
+ "a beautiful landscape with trees and a mountain in the background"
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 16,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 100,
+ -200
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 1,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 18
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 11,
+ 15
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 12
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_base_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 14,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 469,
+ -56
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 5,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 11
+ },
+ {
+ "name": "text_g",
+ "type": "STRING",
+ "link": 17,
+ "widget": {
+ "name": "text_g"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 19
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 128,
+ 128,
+ 0,
+ 0,
+ 2048,
+ 1024,
+ "a beautiful landscape with trees and a mountain in the background",
+ ""
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 17,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 464,
+ 29
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 15
+ },
+ {
+ "name": "text_g",
+ "type": "STRING",
+ "link": 23,
+ "widget": {
+ "name": "text_g"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 20
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 128,
+ 128,
+ 0,
+ 0,
+ 2048,
+ 1024,
+ "text, logo, borders, frame",
+ ""
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 28,
+ "type": "KSamplerAdvanced",
+ "pos": [
+ 1839.40966796875,
+ -184.24490356445312
+ ],
+ "size": {
+ "0": 315,
+ "1": 334
+ },
+ "flags": {},
+ "order": 11,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 44
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 42
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 41
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 52
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 25
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSamplerAdvanced"
+ },
+ "widgets_values": [
+ "enable",
+ 606835094522680,
+ "randomize",
+ 100,
+ 8,
+ "dpmpp_2m_sde_gpu",
+ "normal",
+ 40,
+ 10000,
+ "disable"
+ ],
+ "color": "#323",
+ "bgcolor": "#535"
+ },
+ {
+ "id": 19,
+ "type": "KSamplerAdvanced",
+ "pos": [
+ 740,
+ -160
+ ],
+ "size": {
+ "0": 315,
+ "1": 334
+ },
+ "flags": {},
+ "order": 9,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 18
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 19
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 20
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 21
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 51
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSamplerAdvanced"
+ },
+ "widgets_values": [
+ "enable",
+ 249141753340176,
+ "randomize",
+ 50,
+ 8,
+ "dpmpp_2m_sde_gpu",
+ "normal",
+ 0,
+ 40,
+ "enable"
+ ],
+ "color": "#323",
+ "bgcolor": "#535"
+ },
+ {
+ "id": 8,
+ "type": "VAEDecode",
+ "pos": [
+ 1434,
+ 342
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 12,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 53
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 12
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 24
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 21,
+ "type": "PreviewImage",
+ "pos": [
+ 1797,
+ 344
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 14,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 24
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 34,
+ "type": "Reroute",
+ "pos": [
+ 1132,
+ 0
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 51
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "LATENT",
+ "links": [
+ 52,
+ 53
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 18,
+ "type": "PrimitiveNode",
+ "pos": [
+ -228,
+ -23
+ ],
+ "size": {
+ "0": 210,
+ "1": 76
+ },
+ "flags": {},
+ "order": 2,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [
+ 17,
+ 38
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "text_g"
+ }
+ }
+ ],
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ "a beautiful landscape with trees and a mountain in the background"
+ ],
+ "color": "#432",
+ "bgcolor": "#653"
+ },
+ {
+ "id": 5,
+ "type": "EmptyLatentImage",
+ "pos": [
+ 330,
+ 90
+ ],
+ "size": {
+ "0": 315,
+ "1": 106
+ },
+ "flags": {},
+ "order": 3,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 21
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "EmptyLatentImage"
+ },
+ "widgets_values": [
+ 1024,
+ 512,
+ 8
+ ]
+ },
+ {
+ "id": 20,
+ "type": "PrimitiveNode",
+ "pos": [
+ -229,
+ 135
+ ],
+ "size": {
+ "0": 210,
+ "1": 76
+ },
+ "flags": {},
+ "order": 4,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [
+ 23,
+ 39
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "text_g"
+ }
+ }
+ ],
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ "text, logo, borders, frame"
+ ],
+ "color": "#432",
+ "bgcolor": "#653"
+ },
+ {
+ "id": 32,
+ "type": "SaveImage",
+ "pos": [
+ 2555,
+ -157
+ ],
+ "size": {
+ "0": 315,
+ "1": 270
+ },
+ "flags": {},
+ "order": 15,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 43
+ }
+ ],
+ "properties": {},
+ "widgets_values": [
+ "sdxl_w_ref"
+ ]
+ }
+ ],
+ "links": [
+ [
+ 11,
+ 16,
+ 1,
+ 14,
+ 0,
+ "CLIP"
+ ],
+ [
+ 12,
+ 16,
+ 2,
+ 8,
+ 1,
+ "VAE"
+ ],
+ [
+ 15,
+ 16,
+ 1,
+ 17,
+ 0,
+ "CLIP"
+ ],
+ [
+ 17,
+ 18,
+ 0,
+ 14,
+ 1,
+ "STRING"
+ ],
+ [
+ 18,
+ 16,
+ 0,
+ 19,
+ 0,
+ "MODEL"
+ ],
+ [
+ 19,
+ 14,
+ 0,
+ 19,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 20,
+ 17,
+ 0,
+ 19,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 21,
+ 5,
+ 0,
+ 19,
+ 3,
+ "LATENT"
+ ],
+ [
+ 23,
+ 20,
+ 0,
+ 17,
+ 1,
+ "STRING"
+ ],
+ [
+ 24,
+ 8,
+ 0,
+ 21,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 25,
+ 28,
+ 0,
+ 24,
+ 0,
+ "LATENT"
+ ],
+ [
+ 35,
+ 22,
+ 2,
+ 24,
+ 1,
+ "VAE"
+ ],
+ [
+ 37,
+ 22,
+ 1,
+ 30,
+ 0,
+ "CLIP"
+ ],
+ [
+ 38,
+ 18,
+ 0,
+ 30,
+ 1,
+ "STRING"
+ ],
+ [
+ 39,
+ 20,
+ 0,
+ 31,
+ 1,
+ "STRING"
+ ],
+ [
+ 40,
+ 22,
+ 1,
+ 31,
+ 0,
+ "CLIP"
+ ],
+ [
+ 41,
+ 31,
+ 0,
+ 28,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 42,
+ 30,
+ 0,
+ 28,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 43,
+ 24,
+ 0,
+ 32,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 44,
+ 22,
+ 0,
+ 28,
+ 0,
+ "MODEL"
+ ],
+ [
+ 51,
+ 19,
+ 0,
+ 34,
+ 0,
+ "*"
+ ],
+ [
+ 52,
+ 34,
+ 0,
+ 28,
+ 3,
+ "LATENT"
+ ],
+ [
+ 53,
+ 34,
+ 0,
+ 8,
+ 0,
+ "LATENT"
+ ]
+ ],
+ "groups": [
+ {
+ "title": "SDXL Base",
+ "bounding": [
+ 90,
+ -274,
+ 1041,
+ 479
+ ],
+ "color": "#3f789e",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Group",
+ "bounding": [
+ 1219,
+ -266,
+ 1299,
+ 544
+ ],
+ "color": "#8A8",
+ "font_size": 24,
+ "locked": false
+ }
+ ],
+ "config": {},
+ "extra": {},
+ "version": 0.4
+}
\ No newline at end of file
diff --git a/ComfyUI/workflows/text_to_skybox.json b/ComfyUI/workflows/text_to_skybox.json
new file mode 100644
index 0000000..ecd438d
--- /dev/null
+++ b/ComfyUI/workflows/text_to_skybox.json
@@ -0,0 +1,3518 @@
+{
+ "last_node_id": 139,
+ "last_link_id": 233,
+ "nodes": [
+ {
+ "id": 44,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ 4514.93927980685,
+ 896.9013427646663
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 0,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 82
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 80,
+ 85
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 77
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_refiner_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 53,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 5118.939279806862,
+ 903.9013427646663
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 85
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 84,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 86
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 1024,
+ 1024,
+ "a text, a logo, borders"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 72,
+ "type": "PreviewImage",
+ "pos": [
+ 6732.320592261197,
+ 1022.4875354181333
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 55,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 116
+ }
+ ],
+ "title": "Refined Inpainted Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 69,
+ "type": "PreviewImage",
+ "pos": [
+ 6728.320592261197,
+ 680.4875354181331
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 52,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 114
+ }
+ ],
+ "title": "Inpainted Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 64,
+ "type": "PreviewImage",
+ "pos": [
+ 6726.320592261197,
+ 348.48753541813363
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 48,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 110
+ }
+ ],
+ "title": "Masked Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 51,
+ "type": "CLIPTextEncodeSDXLRefiner",
+ "pos": [
+ 5115.939279806862,
+ 850.9013427646667
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 14,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 80
+ },
+ {
+ "name": "text",
+ "type": "STRING",
+ "link": 81,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 83
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXLRefiner"
+ },
+ "widgets_values": [
+ 6,
+ 1024,
+ 1024,
+ "a landscape of a calm lake during winter"
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 47,
+ "type": "PrimitiveNode",
+ "pos": [
+ -2704.9724782326266,
+ -1546.9441847257217
+ ],
+ "size": {
+ "0": 210,
+ "1": 76.00001525878906
+ },
+ "flags": {},
+ "order": 1,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [
+ 84,
+ 98
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ "a text, a logo, borders"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 103,
+ "type": "VAEEncodeForInpaint",
+ "pos": [
+ 276.26822492539986,
+ -1508.7930392200958
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 29,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "pixels",
+ "type": "IMAGE",
+ "link": 158
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 210
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 159
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 160
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEEncodeForInpaint"
+ },
+ "widgets_values": [
+ 50
+ ]
+ },
+ {
+ "id": 79,
+ "type": "VAEDecode",
+ "pos": [
+ -868.7971534193995,
+ -2118.442204567087
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 23,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 127
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 204
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 133,
+ 164
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 105,
+ "type": "VAEDecode",
+ "pos": [
+ 1533.0353624058816,
+ -1296.2245560660915
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 32,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 167
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 211
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 169,
+ 200
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 102,
+ "type": "CLIPTextEncode",
+ "pos": [
+ -322.31398221552763,
+ -1265.5950233140627
+ ],
+ "size": {
+ "0": 400,
+ "1": 200
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 18,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 229
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 155
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncode"
+ },
+ "widgets_values": [
+ "text, logo, clouds"
+ ]
+ },
+ {
+ "id": 123,
+ "type": "VAEEncodeForInpaint",
+ "pos": [
+ 2305.68769282187,
+ -862.2867925875006
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 36,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "pixels",
+ "type": "IMAGE",
+ "link": 188
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 213
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 189
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 192
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEEncodeForInpaint"
+ },
+ "widgets_values": [
+ 30
+ ]
+ },
+ {
+ "id": 52,
+ "type": "KSampler",
+ "pos": [
+ 5436.58734015978,
+ 682.9881587332845
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 50,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 82
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 83
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 86
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 111
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 88
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 551062170539516,
+ "randomize",
+ 30,
+ 8,
+ "euler",
+ "normal",
+ 0.2
+ ]
+ },
+ {
+ "id": 76,
+ "type": "PrimitiveNode",
+ "pos": [
+ -2700.127957755949,
+ -1275.3608583919813
+ ],
+ "size": {
+ "0": 210,
+ "1": 82
+ },
+ "flags": {},
+ "order": 2,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "INT",
+ "type": "INT",
+ "links": [
+ 120,
+ 122,
+ 131
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "height"
+ }
+ }
+ ],
+ "title": "Initial Image Height\n",
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ 512,
+ "fixed"
+ ]
+ },
+ {
+ "id": 75,
+ "type": "PrimitiveNode",
+ "pos": [
+ -2704.127957755949,
+ -1408.3608583919818
+ ],
+ "size": {
+ "0": 210,
+ "1": 82
+ },
+ "flags": {},
+ "order": 3,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "INT",
+ "type": "INT",
+ "links": [
+ 119,
+ 121,
+ 130
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "width"
+ }
+ }
+ ],
+ "title": "Initial Image Width\n",
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ 2048,
+ "fixed"
+ ]
+ },
+ {
+ "id": 82,
+ "type": "EmptyLatentImage",
+ "pos": [
+ -1873.7971534193998,
+ -2014.442204567088
+ ],
+ "size": {
+ "0": 315,
+ "1": 106
+ },
+ "flags": {},
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "width",
+ "type": "INT",
+ "link": 130,
+ "widget": {
+ "name": "width"
+ }
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "link": 131,
+ "widget": {
+ "name": "height"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 132
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "EmptyLatentImage"
+ },
+ "widgets_values": [
+ 2048,
+ 512,
+ 2
+ ]
+ },
+ {
+ "id": 56,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ -1883,
+ -1834
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 15,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 95
+ },
+ {
+ "name": "text_g",
+ "type": "STRING",
+ "link": 97,
+ "widget": {
+ "name": "text_g"
+ }
+ },
+ {
+ "name": "width",
+ "type": "INT",
+ "link": 119,
+ "widget": {
+ "name": "width"
+ }
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "link": 120,
+ "widget": {
+ "name": "height"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 96,
+ 124
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 2048,
+ 512,
+ 0,
+ 0,
+ 2048,
+ 512,
+ "a landscape of a calm lake during winter",
+ ""
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 57,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ -1878,
+ -1781
+ ],
+ "size": {
+ "0": 400,
+ "1": 270.0000305175781
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 99
+ },
+ {
+ "name": "text_g",
+ "type": "STRING",
+ "link": 98,
+ "widget": {
+ "name": "text_g"
+ }
+ },
+ {
+ "name": "width",
+ "type": "INT",
+ "link": 121,
+ "widget": {
+ "name": "width"
+ }
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "link": 122,
+ "widget": {
+ "name": "height"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 100,
+ 125
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 2048,
+ 512,
+ 0,
+ 0,
+ 2048,
+ 512,
+ "a text, a logo, borders",
+ "a logo, text, borders"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 78,
+ "type": "KSampler",
+ "pos": [
+ -1389.7971534193982,
+ -2047.442204567088
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 21,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 123
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 124
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 125
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 132
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 127
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 1035767414431814,
+ "randomize",
+ 20,
+ 8,
+ "euler",
+ "normal",
+ 0.8
+ ]
+ },
+ {
+ "id": 71,
+ "type": "PreviewImage",
+ "pos": [
+ 6690,
+ -860
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 25,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 133
+ }
+ ],
+ "title": "Initial Image",
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 106,
+ "type": "PreviewImage",
+ "pos": [
+ 6700,
+ -490
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 33,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 169
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 126,
+ "type": "PreviewImage",
+ "pos": [
+ 6710,
+ -160
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 39,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 194
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 129,
+ "type": "Reroute",
+ "pos": [
+ 1582,
+ -411
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 22,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 212
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "VAE",
+ "links": [
+ 213,
+ 214,
+ 215
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 49,
+ "type": "VAEDecode",
+ "pos": [
+ 5836,
+ 931
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 53,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 88
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 77
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 116,
+ 117
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 130,
+ "type": "Reroute",
+ "pos": [
+ 3917,
+ 502
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 24,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 215
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "VAE",
+ "links": [
+ 216,
+ 217
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 128,
+ "type": "Reroute",
+ "pos": [
+ -117,
+ -985
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 20,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 209
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "VAE",
+ "links": [
+ 210,
+ 211,
+ 212
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 127,
+ "type": "Reroute",
+ "pos": [
+ -1378,
+ -1240
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 13,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 203
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "VAE",
+ "links": [
+ 204,
+ 209
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 34,
+ "type": "UNETLoader",
+ "pos": [
+ -2756,
+ -941
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {},
+ "order": 4,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 220
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "UNETLoader"
+ },
+ "widgets_values": [
+ "sd_xl_inpainting_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 133,
+ "type": "Reroute",
+ "pos": [
+ 757,
+ -873
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 9,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 220
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "MODEL",
+ "links": [
+ 221,
+ 222,
+ 223
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 121,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 2060,
+ -679
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 16,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 225
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 190
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 1024,
+ 1024,
+ 0,
+ 0,
+ 1024,
+ 1024,
+ "ground viewed from above",
+ ""
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 122,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ 2040,
+ -334
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 17,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 226
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 191
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 1024,
+ 1024,
+ 0,
+ 0,
+ 1024,
+ 1024,
+ "",
+ ""
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 134,
+ "type": "Reroute",
+ "pos": [
+ 1573,
+ -535
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 11,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 227
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "CLIP",
+ "links": [
+ 225,
+ 226
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 25,
+ "type": "CheckpointLoaderSimple",
+ "pos": [
+ -2753.9724782326266,
+ -1126.9441847257215
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 5,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "MODEL",
+ "type": "MODEL",
+ "links": [
+ 123
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "CLIP",
+ "type": "CLIP",
+ "links": [
+ 95,
+ 99,
+ 227,
+ 228
+ ],
+ "shape": 3,
+ "slot_index": 1
+ },
+ {
+ "name": "VAE",
+ "type": "VAE",
+ "links": [
+ 203
+ ],
+ "shape": 3,
+ "slot_index": 2
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CheckpointLoaderSimple"
+ },
+ "widgets_values": [
+ "sd_xl_base_1.0.safetensors"
+ ]
+ },
+ {
+ "id": 97,
+ "type": "CLIPTextEncodeSDXL",
+ "pos": [
+ -309,
+ -1387
+ ],
+ "size": {
+ "0": 400,
+ "1": 270
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 19,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "clip",
+ "type": "CLIP",
+ "link": 230
+ }
+ ],
+ "outputs": [
+ {
+ "name": "CONDITIONING",
+ "type": "CONDITIONING",
+ "links": [
+ 154
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "CLIPTextEncodeSDXL"
+ },
+ "widgets_values": [
+ 2048,
+ 2048,
+ 0,
+ 0,
+ 2048,
+ 2048,
+ "the sky viewed from below",
+ ""
+ ]
+ },
+ {
+ "id": 135,
+ "type": "Reroute",
+ "pos": [
+ -665,
+ -1176
+ ],
+ "size": [
+ 75,
+ 26
+ ],
+ "flags": {},
+ "order": 12,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "",
+ "type": "*",
+ "link": 228
+ }
+ ],
+ "outputs": [
+ {
+ "name": "",
+ "type": "CLIP",
+ "links": [
+ 229,
+ 230
+ ],
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "showOutputText": false,
+ "horizontal": false
+ }
+ },
+ {
+ "id": 88,
+ "type": "ImageStretchForOutpaint",
+ "pos": [
+ -233,
+ -1688
+ ],
+ "size": {
+ "0": 315,
+ "1": 126
+ },
+ "flags": {},
+ "order": 26,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 164
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 142,
+ 158
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [
+ 151,
+ 159
+ ],
+ "shape": 3,
+ "slot_index": 1
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageStretchForOutpaint"
+ },
+ "widgets_values": [
+ 1024,
+ 0,
+ 10
+ ]
+ },
+ {
+ "id": 95,
+ "type": "MaskToImage",
+ "pos": [
+ 337,
+ -1710
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 28,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 151,
+ "slot_index": 0
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 150
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MaskToImage"
+ }
+ },
+ {
+ "id": 96,
+ "type": "PreviewImage",
+ "pos": [
+ 760,
+ -1747
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 30,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 150
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 68,
+ "type": "SaveImage",
+ "pos": [
+ 6668,
+ 1410
+ ],
+ "size": {
+ "0": 315,
+ "1": 270.0000305175781
+ },
+ "flags": {},
+ "order": 54,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 113
+ }
+ ],
+ "title": "Inpainted Image",
+ "properties": {},
+ "widgets_values": [
+ "skybox"
+ ]
+ },
+ {
+ "id": 73,
+ "type": "MiddleSplit",
+ "pos": [
+ 6094,
+ 1014
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 56,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 117
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Image",
+ "type": "IMAGE",
+ "links": [
+ 118,
+ 231
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MiddleSplit"
+ }
+ },
+ {
+ "id": 125,
+ "type": "VAEDecode",
+ "pos": [
+ 3080,
+ -412
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 38,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 193
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 214
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 194,
+ 201,
+ 202
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 24,
+ "type": "VerticalMiddleMask",
+ "pos": [
+ 3360,
+ 298
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {},
+ "order": 40,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 201
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Mask",
+ "type": "MASK",
+ "links": [
+ 106,
+ 107
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VerticalMiddleMask"
+ },
+ "widgets_values": [
+ 168
+ ]
+ },
+ {
+ "id": 21,
+ "type": "MiddleSplit",
+ "pos": [
+ 3756,
+ 137
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 41,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 202
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Image",
+ "type": "IMAGE",
+ "links": [
+ 42,
+ 49,
+ 109
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MiddleSplit"
+ }
+ },
+ {
+ "id": 63,
+ "type": "MaskToImage",
+ "pos": [
+ 3740,
+ 267
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 43,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 107
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 108
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MaskToImage"
+ }
+ },
+ {
+ "id": 42,
+ "type": "workflow/Blurry Mask",
+ "pos": [
+ 3786,
+ 386
+ ],
+ "size": {
+ "0": 315,
+ "1": 318
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 42,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 106
+ }
+ ],
+ "outputs": [
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [
+ 67
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "workflow/Blurry Mask"
+ },
+ "widgets_values": [
+ 10,
+ 1,
+ "red"
+ ]
+ },
+ {
+ "id": 61,
+ "type": "ImageBlend",
+ "pos": [
+ 4114,
+ 233
+ ],
+ "size": {
+ "0": 315,
+ "1": 102
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 46,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image1",
+ "type": "IMAGE",
+ "link": 109
+ },
+ {
+ "name": "image2",
+ "type": "IMAGE",
+ "link": 108
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 110
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageBlend"
+ },
+ "widgets_values": [
+ 0.5,
+ "multiply"
+ ]
+ },
+ {
+ "id": 27,
+ "type": "VAEEncodeForInpaint",
+ "pos": [
+ 4411,
+ 133
+ ],
+ "size": {
+ "0": 315,
+ "1": 98
+ },
+ "flags": {},
+ "order": 45,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "pixels",
+ "type": "IMAGE",
+ "link": 49
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 216
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 67
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 55
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEEncodeForInpaint"
+ },
+ "widgets_values": [
+ 30
+ ]
+ },
+ {
+ "id": 32,
+ "type": "KSampler",
+ "pos": [
+ 4784,
+ 201
+ ],
+ "size": {
+ "0": 315,
+ "1": 262
+ },
+ "flags": {},
+ "order": 47,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 223
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 96
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 100
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 55
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 56,
+ 111
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSampler"
+ },
+ "widgets_values": [
+ 346909251795691,
+ "randomize",
+ 50,
+ 8,
+ "euler",
+ "normal",
+ 0.9
+ ]
+ },
+ {
+ "id": 28,
+ "type": "VAEDecode",
+ "pos": [
+ 5170,
+ 467
+ ],
+ "size": {
+ "0": 210,
+ "1": 46
+ },
+ "flags": {},
+ "order": 49,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 56
+ },
+ {
+ "name": "vae",
+ "type": "VAE",
+ "link": 217
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 112,
+ 114
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "VAEDecode"
+ }
+ },
+ {
+ "id": 67,
+ "type": "MiddleSplit",
+ "pos": [
+ 5456,
+ 422
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 51,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 112
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Image",
+ "type": "IMAGE",
+ "links": [
+ 113
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MiddleSplit"
+ }
+ },
+ {
+ "id": 136,
+ "type": "ImageReverseLambert",
+ "pos": [
+ 6334,
+ 1069
+ ],
+ "size": {
+ "0": 226.8000030517578,
+ "1": 26
+ },
+ "flags": {},
+ "order": 58,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 231
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 232
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageReverseLambert"
+ }
+ },
+ {
+ "id": 137,
+ "type": "SaveImage",
+ "pos": [
+ 6664,
+ 2058
+ ],
+ "size": {
+ "0": 315,
+ "1": 270
+ },
+ "flags": {},
+ "order": 59,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 232
+ }
+ ],
+ "properties": {},
+ "widgets_values": [
+ "skybox_refined_lambert"
+ ]
+ },
+ {
+ "id": 83,
+ "type": "PreviewImage",
+ "pos": [
+ 332,
+ -1329
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 27,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 142
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 120,
+ "type": "ImageStretchForOutpaint",
+ "pos": [
+ 1914.6118936249993,
+ -902.1584717031254
+ ],
+ "size": {
+ "0": 315,
+ "1": 126
+ },
+ "flags": {},
+ "order": 34,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 200
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 188,
+ 233
+ ],
+ "shape": 3,
+ "slot_index": 0
+ },
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [
+ 189
+ ],
+ "shape": 3,
+ "slot_index": 1
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageStretchForOutpaint"
+ },
+ "widgets_values": [
+ 0,
+ 512,
+ 30
+ ]
+ },
+ {
+ "id": 138,
+ "type": "PreviewImage",
+ "pos": [
+ 2642,
+ -528
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {},
+ "order": 35,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 233
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 3,
+ "type": "PreviewImage",
+ "pos": [
+ 4291,
+ 286
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {
+ "collapsed": false
+ },
+ "order": 44,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 42
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ }
+ },
+ {
+ "id": 46,
+ "type": "PrimitiveNode",
+ "pos": [
+ -2709.8029387054517,
+ -1675.4843737657613
+ ],
+ "size": {
+ "0": 210,
+ "1": 76.00000762939453
+ },
+ "flags": {},
+ "order": 6,
+ "mode": 0,
+ "outputs": [
+ {
+ "name": "STRING",
+ "type": "STRING",
+ "links": [
+ 81,
+ 97
+ ],
+ "slot_index": 0,
+ "widget": {
+ "name": "text"
+ }
+ }
+ ],
+ "properties": {
+ "Run widget replace on values": false
+ },
+ "widgets_values": [
+ "a landscape of a calm lake during winter"
+ ],
+ "color": "#232",
+ "bgcolor": "#353"
+ },
+ {
+ "id": 100,
+ "type": "KSamplerAdvanced",
+ "pos": [
+ 1005.2682249254053,
+ -1472.7930392200956
+ ],
+ "size": {
+ "0": 315,
+ "1": 334
+ },
+ "flags": {},
+ "order": 31,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 221
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 154
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 155
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 160
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 167
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSamplerAdvanced"
+ },
+ "widgets_values": [
+ "enable",
+ 35120899948498,
+ "randomize",
+ 30,
+ 8,
+ "euler",
+ "normal",
+ 0,
+ 10000,
+ "enable"
+ ]
+ },
+ {
+ "id": 124,
+ "type": "KSamplerAdvanced",
+ "pos": [
+ 2696,
+ -952
+ ],
+ "size": {
+ "0": 315,
+ "1": 334
+ },
+ "flags": {},
+ "order": 37,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "MODEL",
+ "link": 222
+ },
+ {
+ "name": "positive",
+ "type": "CONDITIONING",
+ "link": 190
+ },
+ {
+ "name": "negative",
+ "type": "CONDITIONING",
+ "link": 191
+ },
+ {
+ "name": "latent_image",
+ "type": "LATENT",
+ "link": 192
+ }
+ ],
+ "outputs": [
+ {
+ "name": "LATENT",
+ "type": "LATENT",
+ "links": [
+ 193
+ ],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "KSamplerAdvanced"
+ },
+ "widgets_values": [
+ "enable",
+ 357250320487448,
+ "randomize",
+ 30,
+ 8,
+ "euler",
+ "normal",
+ 0,
+ 10000,
+ "disable"
+ ]
+ },
+ {
+ "id": 50,
+ "type": "SaveImage",
+ "pos": [
+ 6677,
+ 1729
+ ],
+ "size": {
+ "0": 315,
+ "1": 270.00006103515625
+ },
+ "flags": {},
+ "order": 57,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 118
+ }
+ ],
+ "title": "Inpainted Refined Image",
+ "properties": {},
+ "widgets_values": [
+ "skybox_refined"
+ ]
+ }
+ ],
+ "links": [
+ [
+ 42,
+ 21,
+ 0,
+ 3,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 49,
+ 21,
+ 0,
+ 27,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 55,
+ 27,
+ 0,
+ 32,
+ 3,
+ "LATENT"
+ ],
+ [
+ 56,
+ 32,
+ 0,
+ 28,
+ 0,
+ "LATENT"
+ ],
+ [
+ 67,
+ 42,
+ 0,
+ 27,
+ 2,
+ "MASK"
+ ],
+ [
+ 77,
+ 44,
+ 2,
+ 49,
+ 1,
+ "VAE"
+ ],
+ [
+ 80,
+ 44,
+ 1,
+ 51,
+ 0,
+ "CLIP"
+ ],
+ [
+ 81,
+ 46,
+ 0,
+ 51,
+ 1,
+ "STRING"
+ ],
+ [
+ 82,
+ 44,
+ 0,
+ 52,
+ 0,
+ "MODEL"
+ ],
+ [
+ 83,
+ 51,
+ 0,
+ 52,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 84,
+ 47,
+ 0,
+ 53,
+ 1,
+ "STRING"
+ ],
+ [
+ 85,
+ 44,
+ 1,
+ 53,
+ 0,
+ "CLIP"
+ ],
+ [
+ 86,
+ 53,
+ 0,
+ 52,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 88,
+ 52,
+ 0,
+ 49,
+ 0,
+ "LATENT"
+ ],
+ [
+ 95,
+ 25,
+ 1,
+ 56,
+ 0,
+ "CLIP"
+ ],
+ [
+ 96,
+ 56,
+ 0,
+ 32,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 97,
+ 46,
+ 0,
+ 56,
+ 1,
+ "STRING"
+ ],
+ [
+ 98,
+ 47,
+ 0,
+ 57,
+ 1,
+ "STRING"
+ ],
+ [
+ 99,
+ 25,
+ 1,
+ 57,
+ 0,
+ "CLIP"
+ ],
+ [
+ 100,
+ 57,
+ 0,
+ 32,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 106,
+ 24,
+ 0,
+ 42,
+ 0,
+ "MASK"
+ ],
+ [
+ 107,
+ 24,
+ 0,
+ 63,
+ 0,
+ "MASK"
+ ],
+ [
+ 108,
+ 63,
+ 0,
+ 61,
+ 1,
+ "IMAGE"
+ ],
+ [
+ 109,
+ 21,
+ 0,
+ 61,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 110,
+ 61,
+ 0,
+ 64,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 111,
+ 32,
+ 0,
+ 52,
+ 3,
+ "LATENT"
+ ],
+ [
+ 112,
+ 28,
+ 0,
+ 67,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 113,
+ 67,
+ 0,
+ 68,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 114,
+ 28,
+ 0,
+ 69,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 116,
+ 49,
+ 0,
+ 72,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 117,
+ 49,
+ 0,
+ 73,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 118,
+ 73,
+ 0,
+ 50,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 119,
+ 75,
+ 0,
+ 56,
+ 2,
+ "INT"
+ ],
+ [
+ 120,
+ 76,
+ 0,
+ 56,
+ 3,
+ "INT"
+ ],
+ [
+ 121,
+ 75,
+ 0,
+ 57,
+ 2,
+ "INT"
+ ],
+ [
+ 122,
+ 76,
+ 0,
+ 57,
+ 3,
+ "INT"
+ ],
+ [
+ 123,
+ 25,
+ 0,
+ 78,
+ 0,
+ "MODEL"
+ ],
+ [
+ 124,
+ 56,
+ 0,
+ 78,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 125,
+ 57,
+ 0,
+ 78,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 127,
+ 78,
+ 0,
+ 79,
+ 0,
+ "LATENT"
+ ],
+ [
+ 130,
+ 75,
+ 0,
+ 82,
+ 0,
+ "INT"
+ ],
+ [
+ 131,
+ 76,
+ 0,
+ 82,
+ 1,
+ "INT"
+ ],
+ [
+ 132,
+ 82,
+ 0,
+ 78,
+ 3,
+ "LATENT"
+ ],
+ [
+ 133,
+ 79,
+ 0,
+ 71,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 142,
+ 88,
+ 0,
+ 83,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 150,
+ 95,
+ 0,
+ 96,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 151,
+ 88,
+ 1,
+ 95,
+ 0,
+ "MASK"
+ ],
+ [
+ 154,
+ 97,
+ 0,
+ 100,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 155,
+ 102,
+ 0,
+ 100,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 158,
+ 88,
+ 0,
+ 103,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 159,
+ 88,
+ 1,
+ 103,
+ 2,
+ "MASK"
+ ],
+ [
+ 160,
+ 103,
+ 0,
+ 100,
+ 3,
+ "LATENT"
+ ],
+ [
+ 164,
+ 79,
+ 0,
+ 88,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 167,
+ 100,
+ 0,
+ 105,
+ 0,
+ "LATENT"
+ ],
+ [
+ 169,
+ 105,
+ 0,
+ 106,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 188,
+ 120,
+ 0,
+ 123,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 189,
+ 120,
+ 1,
+ 123,
+ 2,
+ "MASK"
+ ],
+ [
+ 190,
+ 121,
+ 0,
+ 124,
+ 1,
+ "CONDITIONING"
+ ],
+ [
+ 191,
+ 122,
+ 0,
+ 124,
+ 2,
+ "CONDITIONING"
+ ],
+ [
+ 192,
+ 123,
+ 0,
+ 124,
+ 3,
+ "LATENT"
+ ],
+ [
+ 193,
+ 124,
+ 0,
+ 125,
+ 0,
+ "LATENT"
+ ],
+ [
+ 194,
+ 125,
+ 0,
+ 126,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 200,
+ 105,
+ 0,
+ 120,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 201,
+ 125,
+ 0,
+ 24,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 202,
+ 125,
+ 0,
+ 21,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 203,
+ 25,
+ 2,
+ 127,
+ 0,
+ "*"
+ ],
+ [
+ 204,
+ 127,
+ 0,
+ 79,
+ 1,
+ "VAE"
+ ],
+ [
+ 209,
+ 127,
+ 0,
+ 128,
+ 0,
+ "*"
+ ],
+ [
+ 210,
+ 128,
+ 0,
+ 103,
+ 1,
+ "VAE"
+ ],
+ [
+ 211,
+ 128,
+ 0,
+ 105,
+ 1,
+ "VAE"
+ ],
+ [
+ 212,
+ 128,
+ 0,
+ 129,
+ 0,
+ "*"
+ ],
+ [
+ 213,
+ 129,
+ 0,
+ 123,
+ 1,
+ "VAE"
+ ],
+ [
+ 214,
+ 129,
+ 0,
+ 125,
+ 1,
+ "VAE"
+ ],
+ [
+ 215,
+ 129,
+ 0,
+ 130,
+ 0,
+ "*"
+ ],
+ [
+ 216,
+ 130,
+ 0,
+ 27,
+ 1,
+ "VAE"
+ ],
+ [
+ 217,
+ 130,
+ 0,
+ 28,
+ 1,
+ "VAE"
+ ],
+ [
+ 220,
+ 34,
+ 0,
+ 133,
+ 0,
+ "*"
+ ],
+ [
+ 221,
+ 133,
+ 0,
+ 100,
+ 0,
+ "MODEL"
+ ],
+ [
+ 222,
+ 133,
+ 0,
+ 124,
+ 0,
+ "MODEL"
+ ],
+ [
+ 223,
+ 133,
+ 0,
+ 32,
+ 0,
+ "MODEL"
+ ],
+ [
+ 225,
+ 134,
+ 0,
+ 121,
+ 0,
+ "CLIP"
+ ],
+ [
+ 226,
+ 134,
+ 0,
+ 122,
+ 0,
+ "CLIP"
+ ],
+ [
+ 227,
+ 25,
+ 1,
+ 134,
+ 0,
+ "*"
+ ],
+ [
+ 228,
+ 25,
+ 1,
+ 135,
+ 0,
+ "*"
+ ],
+ [
+ 229,
+ 135,
+ 0,
+ 102,
+ 0,
+ "CLIP"
+ ],
+ [
+ 230,
+ 135,
+ 0,
+ 97,
+ 0,
+ "CLIP"
+ ],
+ [
+ 231,
+ 73,
+ 0,
+ 136,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 232,
+ 136,
+ 0,
+ 137,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 233,
+ 120,
+ 0,
+ 138,
+ 0,
+ "IMAGE"
+ ]
+ ],
+ "groups": [
+ {
+ "title": "Horizontal Tiling",
+ "bounding": [
+ 2846,
+ 60,
+ 2843,
+ 494
+ ],
+ "color": "#b58b2a",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "User Inputs",
+ "bounding": [
+ -2764,
+ -1749,
+ 335,
+ 731
+ ],
+ "color": "#88A",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Refining",
+ "bounding": [
+ 4504,
+ 610,
+ 2062,
+ 534
+ ],
+ "color": "#8A8",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Ouput",
+ "bounding": [
+ 6657,
+ 1324,
+ 335,
+ 1038
+ ],
+ "color": "#3f789e",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Previewes",
+ "bounding": [
+ 6717,
+ 274,
+ 236,
+ 1004
+ ],
+ "color": "#444",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Sky creation",
+ "bounding": [
+ -383,
+ -1902,
+ 2136,
+ 874
+ ],
+ "color": "#3f789e",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Initial Image",
+ "bounding": [
+ -1918,
+ -2186,
+ 1342,
+ 443
+ ],
+ "color": "#8AA",
+ "font_size": 24,
+ "locked": false
+ },
+ {
+ "title": "Ground Outpainting",
+ "bounding": [
+ 1892,
+ -1026,
+ 1128,
+ 994
+ ],
+ "color": "#3f789e",
+ "font_size": 24,
+ "locked": false
+ }
+ ],
+ "config": {},
+ "extra": {
+ "groupNodes": {
+ "Blurry Mask": {
+ "nodes": [
+ {
+ "type": "MaskToImage",
+ "pos": [
+ 190,
+ 520
+ ],
+ "size": {
+ "0": 210,
+ "1": 26
+ },
+ "flags": {},
+ "order": 7,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "MaskToImage"
+ },
+ "index": 0
+ },
+ {
+ "type": "ImageBlur",
+ "pos": [
+ 320,
+ 650
+ ],
+ "size": {
+ "0": 315,
+ "1": 82
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageBlur"
+ },
+ "widgets_values": [
+ 20,
+ 1
+ ],
+ "index": 1
+ },
+ {
+ "type": "PreviewImage",
+ "pos": [
+ 500,
+ 640
+ ],
+ "size": {
+ "0": 210,
+ "1": 246
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 9,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": null
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "PreviewImage"
+ },
+ "index": 2
+ },
+ {
+ "type": "ImageToMask",
+ "pos": [
+ 380,
+ 540
+ ],
+ "size": {
+ "0": 315,
+ "1": 58
+ },
+ "flags": {
+ "collapsed": true
+ },
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [],
+ "shape": 3,
+ "slot_index": 0
+ }
+ ],
+ "properties": {
+ "Node name for S&R": "ImageToMask"
+ },
+ "widgets_values": [
+ "red"
+ ],
+ "index": 3
+ }
+ ],
+ "links": [
+ [
+ null,
+ 0,
+ 0,
+ 0,
+ 24,
+ "MASK"
+ ],
+ [
+ 0,
+ 0,
+ 1,
+ 0,
+ 39,
+ "IMAGE"
+ ],
+ [
+ 1,
+ 0,
+ 2,
+ 0,
+ 38,
+ "IMAGE"
+ ],
+ [
+ 1,
+ 0,
+ 3,
+ 0,
+ 38,
+ "IMAGE"
+ ]
+ ],
+ "external": [
+ [
+ 3,
+ 0,
+ "MASK"
+ ]
+ ]
+ }
+ }
+ },
+ "version": 0.4
+}
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0a27482
--- /dev/null
+++ b/README.md
@@ -0,0 +1,179 @@
+# VR Environment Creation with Generative AI, Python Server
+
+A Python project to create VR environments using Generative AI.
+You can run it as a TCP server to interface it with a [Unity client](https://github.com/fcbg-hnp-vr/VR-Environment-GenAI-Unity),
+to get the fully-fledged AI/VR application.
+
+This is a use case of generative AI to build a complete VR scenery.
+It was developed at the [Fondation Campus Biotech Geneva](https://fcbg.ch/),
+in collaboration with the [Laboratory of Cognitive Science](https://www.epfl.ch/labs/lnco/),
+by Hugo FARAJALLAH.
+
+## Requirements
+
+- Python 3.10.12+
+- A CUDA-compatible graphic card and at least 12 GB of VRAM.
+- Up to 15 GB of storage for the models.
+
+## Installation
+
+Using Python:
+
+1. Install [CUDA 12.1](https://developer.nvidia.com/cuda-12-1-0-download-archive), it enables computation on the GPU.
+2. Install **Python 3.10**, for Windows you can download it using the [official installer](https://www.python.org/downloads/windows/).
+3. Clone or copy the Git repository: .
+4. Create a Python virtual environment. While not strictly necessary, it is highly recommended as the project requires
+many dependencies. For instance using [venv](https://docs.python.org/3/library/venv.html):
+ * On Linux:
+ ```bash
+ cd VR-Environment-GenAI-Server
+ # From https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#create-and-use-virtual-environments
+ python -m venv .venv # Creates the virtual environment under .venv
+ source .venv/bin/activate # Activates it
+ ```
+ * On Windows:
+ ```shell
+ cd VR-Environment-GenAI-Server
+ # From https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#create-and-use-virtual-environments
+ py -m venv .venv # Creates the virtual environment under .venv
+ .venv\Scripts\activate # Activates it
+ ```
+5. Install the Python requirements.
+ ```bash
+ pip install -r requirements.txt
+ ```
+ **Important**: at the time of writing (2024-07-29) the default version of PyTorch
+ is compatible with CUDA 12.1, and you may not need any extra steps. If you receive
+ error message telling you that your version of PyTorch is not compatible with CUDA,
+ uninstall PyTorch completely and reinstall it by running
+ ``pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121``.
+ Please have a look at for details.
+
+From here on, the project should be functional. The next section is optional, but it can save you a lot of time.
+
+> (optional) You can speed up image generation using [accelerate](https://huggingface.co/docs/accelerate/index).
+Download it with ``pip install accelerate``.
+
+
+## Installation details
+
+* The first time a model is launched is needs to be downloaded,
+this operation can take some time, and you need an internet connection.
+The [Usage](#usage) section explains how to download all models at once.
+* For users of PyCharm, an `.idea` folder is included to add the folder as a project.
+* Optional, demo only: to capture the audio from the microphone in Python (ASR),
+you need ffmpeg, portaudio and pyaudio:
+ ```bash
+ sudo apt install ffmpeg portaudio19-dev python3-pyaudio
+ pip install -r requirements-optional.txt # Installs PyAudio
+ ```
+
+## Usage
+
+Each file can be executed independently, so they are as many entry points as files.
+
+The most common use cases are the following:
+
+* Generate a new image with ``python -m skybox.diffusion``.
+* Download all models with ``python -m utils.download_models``.
+If you don't do it the models will be downloaded at run time which may be very slow.
+* Start the server with ``python -m server.run``.
+
+Next is the detail for special files.
+
+### Image generation
+
+Go to the ``skybox`` folder.
+
+1. diffusion.py - base module to create an image from a diffusion model.
+2. inpainting.py - implements an inpainting model.
+3. image_processing.py - defines image processing features
+4. mask_editor.py - code logics to generate a mask adapted to the image.
+The result is usually passed to inpainting functions.
+5. panorama_creator.py - code logics to generate a panorama.
+6. The code in ``skybox/legacy`` may not be useful. I keep it there for personal intents.
+
+### 3D features
+
+3D features are in the ``environment`` folder. It is still in active development at the time of writing (June 2024),
+hence the following is subject to change.
+
+1. depth_generation.py - provides a model to come from a standard RGB image and create a depth map.
+2. point_cloud_pipeline.py - uses the RGBD to create a point cloud, and converts it to a mesh.
+3. mesh_pipeline.py - uses the RGBD image and representation features to create a terrain mesh.
+4. mask_former.py - semantic segmentation of an RGB image.
+5. image_segmentation.py - uses an RGBD+semantic image to isolate the main elements.
+6. depth_inpainting.py - combines inpainting controlled by depth data to recreate parts of a terrain.
+Yet not integrated in the main code base.
+7. rendered.py - create a 3D view for the terrain, not finished yet.
+
+### Speech-to-text (ASR)
+
+For speech to text features, go to ``asr`` (automatic speech recognition)
+
+* speech_to_text.py - implements an Automatic Speech Recognition (ASR) model.
+* asr_demo.py - simply a demo, you can either use your microphone or load the dataset
+
+### ComfyUI graphical interface
+
+If you want to use a graphical interface instead of Python code,
+you can use the provided [ComfyUI](https://github.com/comfyanonymous/ComfyUI) workflows
+in the `ComfyUI` folder.
+
+The explanation for each workflow is detailed in [ComfyUI/README.md](ComfyUI/README.md).
+
+### Server
+
+The server features are in `server`. See [Start as a TCP server](#start-as-a-tcp-server) for the details on usage.
+
+* run.py - starts a TCP server, able to serve requests to the previously defined models.
+* task_tracker.py - Just a class adding syntactic suger to track a task easily
+* utils.py - Utility functions for the server.
+
+
+### Other Features
+
+* As a test, the ``sound`` folder has some experiments with sound generation.
+* The ``utils`` folder contains useful functions for the user:
+ * download_models.py - downloads useful models for the server. It does not download all models.
+
+## Configuration
+
+The main server configuration is in ``api.json``.
+The most significant configuration data are "serverIp" and "serverPort" as they set the address of the server.
+
+## Start as a TCP server
+
+A TCP server can be started in order to offload the AI part from the application thread.
+Just launch `python -m server.run`. The server [configuration](#configuration) is defined in `api.json`.
+The communication is handled in JSON format, with a strong HTTP style.
+
+To connect to the server from another computer on the same network, you need to open a port.
+On Windows, you simply need to go to the control panel add a new rule for the port `9000` (with the default configuration).
+This [How-To Geek tutorial](https://www.howtogeek.com/394735/how-do-i-open-a-port-on-windows-firewall/) seems guiding enough.
+On Linux, opening ports is a bit more fun, I personally recommend using nginx with a port redirection.
+
+## Roadmap
+
+Current status of the project, from a very far perspective.
+
+- [x] Skybox generation : v0.4 done, go to ``skybox/panorama_creator.py``
+- [ ] Terrain generation : Early 3D terrain generation in ``environment/renderer.py`` not suitable for production now.
+- [ ] Props generation : use billboards only as current technology do not allow to dream bigger.
+
+## Models' list
+
+This project includes several artificial neural network models.
+If you want to substitute a model by another one, you should have a good knowledge of what you are doing,
+otherwise the quality of the end product may be decreased.
+
+- Image creation : [Stable Diffusion XL base 1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) and
+[Stable Diffusion XL refiner 1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0).
+- Inpainting and outpainting : [Stable Diffusion XL 1.0 Inpainting 0.1](https://huggingface.co/diffusers/stable-diffusion-xl-1.0-inpainting-0.1).
+- Speech-to-text and translation : [Whisper Large v3](https://huggingface.co/openai/whisper-large-v3).
+
+Please have a look at ``utils/download_models.py`` to see where those models are loaded from.
+
+## Useful Links
+
+You can download the official Unity client from [VR-Environment-GenAI-Unity (GitHub)](https://github.com/fcbg-hnp-vr/VR-Environment-GenAI-Unity).
diff --git a/api.json b/api.json
new file mode 100644
index 0000000..0221a69
--- /dev/null
+++ b/api.json
@@ -0,0 +1,7 @@
+{
+ "name": "AIWorldGenerationAPI",
+ "description": "AI World Generation API",
+ "version": "0.4.2",
+ "serverIp": "127.0.0.1",
+ "serverPort": 9000
+}
\ No newline at end of file
diff --git a/asr/asr_demo.py b/asr/asr_demo.py
new file mode 100644
index 0000000..6b30647
--- /dev/null
+++ b/asr/asr_demo.py
@@ -0,0 +1,79 @@
+"""
+Demo file for an Automatic Speech Recognition system.
+"""
+import wave
+
+from datasets import load_dataset
+import pyaudio
+
+from asr.speech_to_text import do_audio_transcription
+
+
+def register_audio():
+ """Register audio from the user's microphone."""
+ chunk = 1024
+ audio_format = pyaudio.paInt16
+ channels = 1
+ rate = 44100
+ record_seconds = 10
+ output_filename = "output.mp3"
+
+ p = pyaudio.PyAudio()
+
+ stream = p.open(
+ format=audio_format,
+ channels=channels,
+ rate=rate,
+ input=True,
+ frames_per_buffer=chunk,
+ )
+
+ print("* recording")
+
+ frames = []
+
+ for _ in range(int(rate / chunk * record_seconds)):
+ data = stream.read(chunk)
+ frames.append(data)
+
+ print("* done recording")
+
+ stream.stop_stream()
+ stream.close()
+ p.terminate()
+
+ wf = wave.open(output_filename, "wb")
+ wf.setnchannels(channels)
+ wf.setsampwidth(p.get_sample_size(audio_format))
+ wf.setframerate(rate)
+ wf.writeframes(b"".join(frames))
+ wf.close()
+ return output_filename
+
+
+def main_demo():
+ """
+ Print the user audio or a default sample.
+
+ If the user chooses to enter their own audio, it calls the `register_audio` function to record
+ audio and then uses the `sample_to_text` function to convert the audio to text.
+
+ If the user chooses not to enter their own audio,
+ it uses a default sample from the 'distil-whisper/librispeech_long' dataset.
+ """
+ if input("Would you like to enter your own audio (y/[N])? ") == "y":
+ print("Please describe what you would like to see.")
+ sample = register_audio()
+ else:
+ print("Using default sample")
+ dataset = load_dataset(
+ "distil-whisper/librispeech_long", "clean", split="validation"
+ )
+ sample = dataset[0]["audio"]
+
+ result = do_audio_transcription(sample)
+ print(result["text"])
+
+
+if __name__ == "__main__":
+ main_demo()
diff --git a/asr/speech_to_text.py b/asr/speech_to_text.py
new file mode 100644
index 0000000..6ed2e8e
--- /dev/null
+++ b/asr/speech_to_text.py
@@ -0,0 +1,51 @@
+"""
+A simple Speech-to-Text module.
+
+It uses whisper by OpenAI, source https://huggingface.co/openai/whisper-large-v3
+"""
+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+
+MODEL_ID = "openai/whisper-large-v3"
+
+
+def get_asr_model():
+ """Load the model from Hugging Face."""
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+ return AutoModelForSpeechSeq2Seq.from_pretrained(
+ MODEL_ID,
+ torch_dtype=torch_dtype,
+ low_cpu_mem_usage=True,
+ use_safetensors=True
+ )
+
+
+def do_audio_transcription(audio):
+ """
+ Return the text from an audio file.
+
+ :param audio: Input audio, either a file path or bytes
+ :type audio: str | bytes[]
+ :return str: Text in the audio
+ """
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+
+ model = get_asr_model().to(device)
+
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
+
+ pipe = pipeline(
+ "automatic-speech-recognition",
+ model=model,
+ tokenizer=processor.tokenizer,
+ feature_extractor=processor.feature_extractor,
+ max_new_tokens=128,
+ chunk_length_s=30,
+ batch_size=16,
+ return_timestamps=True,
+ torch_dtype=torch_dtype,
+ device=device,
+ generate_kwargs={"task": "translate"}
+ )
+ return pipe(audio)
diff --git a/environment/depth_generation.py b/environment/depth_generation.py
new file mode 100644
index 0000000..a4597df
--- /dev/null
+++ b/environment/depth_generation.py
@@ -0,0 +1,151 @@
+"""
+Generate an RGBD image from a simgle image.
+"""
+
+from PIL import Image
+from diffusers import MarigoldDepthPipeline
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def compute_image_depth(image, color_map="Spectral"):
+ """
+ Compute the depth of the image.
+
+ :param PIL.Image.Image image: Input RGB image path.
+ :param str | None color_map: Colorize depth image, set to None to skip colormap generation.
+ :return: Pipeline
+ """
+ # Original DDIM version (higher quality)
+ pipe = MarigoldDepthPipeline.from_pretrained(
+ "prs-eth/marigold-v1-0",
+ custom_pipeline="marigold_depth_estimation",
+ )
+ # Note: a 16-bit variant is also available, just use torch_dtype=torch.float16, variant="fp16"
+
+ pipe.to("cuda")
+
+ return pipe(
+ image,
+ # (optional) Maximum resolution of processing. If set to 0: will not resize at all.
+ # Defaults to 768.
+ # processing_res=768,
+ # (optional) Resize depth prediction to match input resolution.
+ # match_input_res=True,
+ # (optional) Inference batch size, no bigger than `num_ensemble`.
+ # If set to 0, the script will automatically decide the proper batch size. Defaults to 0.
+ # batch_size=0,
+ # (optional) Random seed can be set to ensure additional reproducibility.
+ # Default: None (unseeded).
+ # Note: forcing --batch_size 1 helps to increase reproducibility.
+ # To ensure full reproducibility, deterministic mode needs to be used.
+ # seed=2024,
+ # (optional) Colormap used to colorize the depth map. Defaults to "Spectral".
+ # Set to `None` to skip colormap generation.
+ color_map=color_map,
+ # (optional) If true, will show progress bars of the inference progress.
+ show_progress_bar=False,
+ )
+
+
+def get_depth(image):
+ """Return a depth map of the image."""
+ pipeline_output = compute_image_depth(image, color_map=None)
+ return pipeline_output.depth_np
+
+
+def get_depth_image(image, depth_map_path=None, color_map_path=None):
+ """
+ Return the colored depth image, save both grey and colored depth.
+
+ :param PIL.Image.Image image: Input RGB image path.
+ :param depth_map_path: Path to the depth map if it should be saved as a file
+ :type depth_map_path: str or None
+ :param color_map_path: Path to the colored depth map if it should be saved as a file
+ :type color_map_path: str or None
+ :return np.ndarray: Depth map, between 0 and 1
+ """
+ pipeline_output = compute_image_depth(image)
+ # Predicted depth map
+ depth = pipeline_output.depth_np
+
+ if depth_map_path is not None:
+ # Save as uint16 PNG
+ depth_uint16 = (depth * (2**16 - 1)).astype(np.uint16)
+ grey_depth_image = Image.fromarray(depth_uint16)
+ grey_depth_image.save(depth_map_path, mode="I;16")
+
+ if color_map_path is not None:
+ # Colorized prediction
+ depth_colored: Image.Image = pipeline_output.depth_colored
+ # Save colorized depth map
+ depth_colored.save(color_map_path)
+ return depth
+
+
+def plot_arrays(array1, array2, titles=None):
+ """
+ Plot two matrix arrays as images.
+
+ Create a figure with two subplots and plots the given matrix arrays as grayscale images.
+ If the `titles` parameter is provided, it sets the titles for the two plots.
+
+ :param array1: The first matrix array to be plotted.
+ :type array1: numpy.ndarray
+
+ :param array2: The second matrix array to be plotted.
+ :type array2: numpy.ndarray
+
+ :param titles: Optional titles for the two plotted images.
+ :type titles: tuple, default is None
+ """
+ # Create a figure and grid objects
+ _fig, axes = plt.subplots(1, 2)
+
+ # Plot the arrays as an images
+ axes[0].imshow(array1, cmap="gray")
+ axes[1].imshow(array2, cmap="gray")
+
+ if titles is not None:
+ axes[0].set_title(titles[0])
+ axes[1].set_title(titles[1])
+ plt.show()
+
+
+def view_flat_estimation(rgbd_image):
+ """
+ Plot the color and depth components of an RGBD image.
+
+ :param rgbd_image: An RGBD Image containing the color and depth components of an image.
+ :type rgbd_image: open3d.geometry.RGBDImage
+ """
+ plot_arrays(
+ rgbd_image.color,
+ rgbd_image.depth,
+ ["Mountain grayscale image", "Mountain depth image"],
+ )
+
+
+def get_horizon_height(depth_map):
+ """Return the height of the horizon line in pixel coordinates."""
+ average_depth = np.median(depth_map, axis=1)
+ return np.argmax(average_depth)
+
+
+def main(image=None):
+ """
+ Main demo function for depth generation.
+
+ :param PIL.Image.Image | None image: The image to generate depth from.
+ """
+ depth_map_path = "outputs/" + ("sunny_" if image is None else "") + "depth_map.png"
+ color_map_path = (
+ "outputs/" + ("sunny_" if image is None else "") + "depth_colored.png"
+ )
+ if image is None:
+ image = Image.open("../sunny_mountain.png")
+ get_depth_image(image, depth_map_path, color_map_path)
+
+
+if __name__ == "__main__":
+ main(Image.open("../sky.png"))
diff --git a/environment/depth_inpainting.py b/environment/depth_inpainting.py
new file mode 100644
index 0000000..4cad811
--- /dev/null
+++ b/environment/depth_inpainting.py
@@ -0,0 +1,70 @@
+"""
+Inpainting using depth data as a ControlNet.
+
+https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl#diffusers.StableDiffusionXLControlNetInpaintPipeline
+"""
+
+import diffusers
+from PIL import Image
+import torch
+
+
+def get_inpaint_depth_pipeline():
+ """
+ Initialize and return a Stable Diffusion XL ControlNet inpainting pipeline.
+ The pipeline uses depth data as a control signal for inpainting.
+
+ For details:
+ https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl#diffusers.StableDiffusionXLControlNetInpaintPipeline
+
+ :return: A pre-configured pipeline for inpainting with depth control.
+ :rtype: diffusers.pipelines.stable_diffusion.StableDiffusionPipelineOutput
+ """
+ controlnet = diffusers.ControlNetModel.from_pretrained(
+ "diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16
+ )
+ pipe = diffusers.StableDiffusionXLControlNetInpaintPipeline.from_pretrained(
+ "stabilityai/stable-diffusion-xl-base-1.0",
+ controlnet=controlnet,
+ torch_dtype=torch.float16,
+ )
+ return pipe
+
+
+def inpaint_depth_controlled(init_image, mask_image, control_image, prompts):
+ """
+ Perform depth-guided inpainting using a Stable Diffusion XL ControlNet pipeline.
+
+ This function initializes a pre-configured pipeline for inpainting with depth control,
+ and then generates images based on the given parameters.
+
+
+ :param PIL.Image.Image init_image: The initial image to start inpainting from.
+ :param PIL.Image.Image mask_image: The mask image indicating the areas to be inpainted.
+ :param PIL.Image.Image control_image: The depth map image to guide the inpainting process.
+ :param str prompts: The text prompt to guide the image generation.
+
+ :return list[PIL.Image.Image]: A list containing the generated inpainted images.
+ """
+ pipe = get_inpaint_depth_pipeline()
+ # pipe.to("cuda")
+ pipe.enable_model_cpu_offload() # use it instead of CUDA if you run out of VRAM
+ # Generate the images
+ images = pipe(
+ prompts,
+ num_inference_steps=50,
+ eta=1.0,
+ image=init_image,
+ mask_image=mask_image,
+ control_image=control_image,
+ ).images
+ return images
+
+
+if __name__ == "__main__":
+ inpaint_depth_controlled(
+ Image.open("../sunny_mountain.png"),
+ Image.open("../skybox/mask.png"),
+ Image.open("sunny_depth_map.png"),
+ "a mountain",
+ )[0].show()
diff --git a/environment/image_segmentation.py b/environment/image_segmentation.py
new file mode 100644
index 0000000..df9a8a0
--- /dev/null
+++ b/environment/image_segmentation.py
@@ -0,0 +1,610 @@
+"""
+Image segmentation techniques using the depth map.
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+from scipy.ndimage import gaussian_filter
+import skimage
+from sklearn.preprocessing import minmax_scale, scale
+from sklearn.cluster import KMeans, DBSCAN
+import torch
+
+from environment.mask_former import mask_former, panoptic_segmentation, get_sky_ids
+
+
+RANDOM_SEED = 0
+
+DEFAULT_IMAGE = "../sunny_mountain.png"
+
+
+def planar_grid(image):
+ """
+ Create a planar grid of values.
+
+ The output grid will have dimension (*image.shape, 2) that respects
+ ``grid[x, y] = [image.shape[0] / x, image.shape[1] / y]``.
+ """
+ # Grid from [0, image.shape[axis]] on each axis
+ grid = np.indices(image.shape).T
+ # Reduce each axis to [0, 1]
+ return grid / (image.shape - np.ones(image.ndim))
+
+
+def do_kmeans(data, n_clusters):
+ """Apply a K-mean clustering."""
+ flat_data = data.reshape((-1, data.shape[2]))
+
+ # Create an instance of the K-Means clustering algorithm
+ kmeans = KMeans(n_clusters=n_clusters, random_state=RANDOM_SEED)
+ # Fit the K-Means algorithm to the pixel data
+ kmeans.fit(flat_data)
+ # Predict the cluster labels for each pixel
+ labels = kmeans.predict(flat_data)
+
+ return labels.reshape(*data.shape[:2])
+
+
+def deep_scale_stack_data(*data):
+ """Stack data on depth, normalized."""
+ normalized_data = tuple(
+ minmax_scale(d.reshape(-1, 1)).reshape(*d.shape) for d in data
+ )
+ return np.dstack(normalized_data)
+
+
+def depth_clustering(image, n_clusters=5):
+ """Cluster using depth only."""
+ return do_kmeans(image, n_clusters)
+
+
+def spatial_clustering(image, n_clusters=15):
+ """Cluster using XYD data."""
+ grid = planar_grid(image)
+ xyd_image = deep_scale_stack_data(image, grid)
+ return do_kmeans(xyd_image, n_clusters)
+
+
+def rgbd_spatial_clustering(depth_image, rgb_image, n_clusters=15):
+ """Cluster using spatial K-means on RGBD data."""
+ grid = planar_grid(depth_image)
+ xy_rgbd_image = deep_scale_stack_data(grid, rgb_image, depth_image)
+ return do_kmeans(xy_rgbd_image, n_clusters)
+
+
+def mask_former_clustering(depth_image, mask_former_labels, n_clusters=15):
+ """Cluster using MaskFormer."""
+ grid = planar_grid(depth_image)
+ xyd_mask_image = deep_scale_stack_data(grid, depth_image, mask_former_labels)
+ return do_kmeans(xyd_mask_image, n_clusters)
+
+
+def spatial_fz_mf_clustering(image, segments_fz, mask_former_labels, n_clusters=None):
+ """Cluster using Felzenszwalbs's method."""
+ grid = planar_grid(image)
+ if n_clusters is None:
+ sizes = np.unique(segments_fz).shape[0], np.unique(mask_former_labels).shape[0]
+ n_clusters = max(sizes)
+ print(f"Segmenting in {n_clusters} clusters")
+
+ stacked_data = deep_scale_stack_data(grid, image, segments_fz, mask_former_labels)
+ return do_kmeans(stacked_data, n_clusters)
+
+
+def compare_segmentations(image_path, depth_path):
+ """Compare various segmentation methods."""
+ # Load the grayscale image
+ image = skimage.io.imread(depth_path, as_gray=True)
+
+ # Remove background
+ far_clip = np.quantile(image, 0.7)
+ clipped = np.clip(image, 0, far_clip)
+
+ # Apply Gaussian filtering to reduce noise (optional)
+ filtered_image = skimage.filters.gaussian(clipped, sigma=1)
+
+ original_spatial = spatial_clustering(skimage.filters.gaussian(image, sigma=1), 12)
+ spatial_clusters = spatial_clustering(filtered_image, 12)
+
+ clustering_labels = spatial_clusters.reshape(filtered_image.shape)
+ mask_former_labels = mask_former(Image.open(image_path))
+
+ mask_former_clusters = mask_former_clustering(clipped, mask_former_labels, 10)
+ k_mask_former_labels = mask_former_clusters.reshape(filtered_image.shape)
+
+ segments_fz = skimage.segmentation.felzenszwalb(
+ clipped, scale=1, min_size=int(np.sqrt(image.shape[0] * image.shape[1]) * 10)
+ )
+
+ fz_mf_labels = spatial_fz_mf_clustering(
+ clipped, segments_fz, mask_former_labels, n_clusters=None
+ )
+
+ _fig, ax = plt.subplots(3, 3, figsize=(10, 10), sharex=True, sharey=True)
+ ax[0, 0].set_title("Spatial K-means")
+ ax[0, 0].imshow(
+ skimage.segmentation.mark_boundaries(
+ image, original_spatial.reshape(filtered_image.shape)
+ )
+ )
+ ax[0, 1].set_title("Clipped spatial K-means")
+ ax[0, 1].imshow(skimage.segmentation.mark_boundaries(image, clustering_labels))
+ ax[0, 2].set_title("Felzenszwalbs's method")
+ ax[0, 2].imshow(skimage.segmentation.mark_boundaries(image, segments_fz))
+ ax[1, 0].set_title("MaskFormer segmentation")
+ ax[1, 0].imshow(skimage.segmentation.mark_boundaries(image, mask_former_labels))
+ ax[1, 1].set_title("K-Mean + MaskFormer segmentation")
+ ax[1, 1].imshow(skimage.segmentation.mark_boundaries(image, k_mask_former_labels))
+ ax[1, 2].set_title("MaskFormer + Felzenszwalbs,\nK-mean")
+ ax[1, 2].imshow(skimage.segmentation.mark_boundaries(image, fz_mf_labels))
+ ax[2, 0].set_title("Spatial K-means")
+ ax[2, 0].imshow(clustering_labels)
+ ax[2, 1].set_title("K-Mean + MaskFormer segmentation")
+ ax[2, 1].imshow(k_mask_former_labels)
+ ax[2, 2].set_title("MaskFormer + Felzenszwalbs,\nK-mean")
+ ax[2, 2].imshow(fz_mf_labels)
+
+ plt.show()
+
+
+def segmentation_maps(image_path, depth_path):
+ """Segment the image and display the result."""
+ # Load the grayscale image
+ image = skimage.io.imread(depth_path, as_gray=True)
+
+ # Remove background
+ far_clip = np.quantile(image, 0.7)
+ clipped = np.clip(image, 0, far_clip)
+
+ grid = planar_grid(image)
+ xyd_image = deep_scale_stack_data(image, grid)
+
+ mask_former_labels = mask_former(Image.open(image_path))
+
+ segments_fz = skimage.segmentation.felzenszwalb(
+ clipped, scale=1, min_size=int(np.sqrt(image.shape[0] * image.shape[1]) * 10)
+ )
+
+ fz_mf_labels = spatial_fz_mf_clustering(
+ clipped, segments_fz, mask_former_labels, n_clusters=None
+ )
+
+ _fig, ax = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True)
+ ax[0, 0].set_title("Clipped xyD image")
+ ax[0, 0].imshow(xyd_image)
+ ax[0, 1].set_title("Felzenszwalbs's method")
+ ax[0, 1].imshow(segments_fz)
+ ax[1, 0].set_title("MaskFormer segmentation")
+ ax[1, 0].imshow(mask_former_labels)
+ ax[1, 1].set_title("Final segmentation")
+ ax[1, 1].imshow(fz_mf_labels)
+
+ plt.show()
+
+
+def segment_image(image_path, depth_path, far_clip_pos=0.7):
+ """Segment the RGB image with all methods."""
+ # Load the grayscale image
+ image = skimage.io.imread(depth_path, as_gray=True)
+
+ # Remove background
+ far_clip = np.quantile(image, far_clip_pos)
+ clipped = np.clip(image, 0, far_clip)
+
+ mask_former_labels = mask_former(Image.open(image_path))
+
+ segments_fz = skimage.segmentation.felzenszwalb(
+ clipped, scale=1, min_size=int(np.sqrt(image.shape[0] * image.shape[1]) * 10)
+ )
+
+ return spatial_fz_mf_clustering(clipped, segments_fz, mask_former_labels)
+
+
+def show_images_grid(np_images):
+ """Show images in a nice grid."""
+ n_cols = int(np.sqrt(len(np_images)))
+ n_lines = int(np.ceil(len(np_images) / n_cols))
+ _fig, ax = plt.subplots(n_lines, n_cols)
+ for i, result in enumerate(np_images):
+ ax[i // n_cols, i % n_cols].imshow(result)
+ plt.show()
+
+
+def split_image(image, labels):
+ """Split image in the different labels."""
+ labels_values = np.unique(labels)
+ outputs = []
+ for target_label in labels_values:
+ mask = (labels == target_label).astype(image.dtype)
+ image_masked = image * np.dstack((mask, mask, mask))
+ outputs.append(image_masked)
+ Image.fromarray(image_masked).save(f"outputs/image_{target_label}.png")
+ # show_images_grid(outputs)
+ return outputs
+
+
+def crop_to_mask(np_image, mask):
+ """Crop an image to a specific mask."""
+ # Find the indices of non-False
+ non_null_rows, non_null_cols = np.nonzero(mask)
+
+ # Find the bounding box
+ crop = np_image[
+ np.min(non_null_rows) : np.max(non_null_rows),
+ np.min(non_null_cols) : np.max(non_null_cols),
+ ]
+ return crop
+
+
+def crop_to_content(np_images):
+ """Crop an image to a not empty space."""
+ crops = []
+ for i, np_im in enumerate(np_images):
+ mask = np.sum(np_im, axis=2) > 0
+ crop = crop_to_mask(np_im, mask)
+ crops.append(crop)
+
+ cropped_mask = crop_to_mask(mask, mask)
+ transparent = np.dstack((crop, cropped_mask * 255)).astype(np.uint8)
+ Image.fromarray(transparent).save(f"outputs/cropped_{i}.png")
+ return crops
+
+
+def segment_and_save(image_path, depth_path):
+ """Segment an image and save each segment."""
+ labels = segment_image(image_path, depth_path)
+ images = split_image(skimage.io.imread(image_path), labels)
+ segments = crop_to_content(images)
+ # show_images_grid(segments)
+ return segments
+
+
+def is_segment_skybox(
+ depth_field, mask, far_threshold, near_threshold, vertical_gradient=0.2
+):
+ """Check if a given segment is a good skybox candidate."""
+ data = depth_field[mask]
+ mean_depth = np.mean(data)
+ # Object is very far: skybox
+ if mean_depth > far_threshold:
+ return True
+ # Object very near: not a skybox
+ if mean_depth < near_threshold:
+ return False
+ # Otherwise check if it is deep with a nice vertical decreasing gradient
+ masked_image = depth_field * mask / mean_depth
+ return np.mean(masked_image[1:] - masked_image[:-1]) < vertical_gradient
+
+
+def mask_skybox(image_path, depth_path, labeled_image):
+ """List of the labels corresponding to the sky."""
+ rgb_image = skimage.io.imread(image_path)
+ depth_map = skimage.io.imread(depth_path)
+ _images = split_image(rgb_image, labeled_image)
+ labels = np.unique(labeled_image)
+
+ skybox_list = []
+ far_plane = np.quantile(depth_map, 0.55)
+ near_plane = np.quantile(depth_map, 0.30)
+ # Check if elements belong to the skybox
+ for label_id in labels:
+ mask = labeled_image == label_id
+ depth_field = crop_to_mask(depth_map, mask)
+ is_skybox = is_segment_skybox(
+ depth_field, crop_to_mask(mask, mask), far_plane, near_plane
+ )
+ if is_skybox:
+ # Mark the label
+ skybox_list.append(label_id)
+
+ return skybox_list
+
+
+def mask_terrain(image_path, depth_path, labeled_image, ignore_labels=None):
+ """Create a mask for the labels belonging to the terrain."""
+ rgb_image = skimage.io.imread(image_path)
+ depth_map = minmax_scale(skimage.io.imread(depth_path).flatten()).reshape(
+ rgb_image.shape[:-1]
+ )
+ labels = np.unique(labeled_image)
+ images = split_image(rgb_image, labeled_image)
+
+ terrain_list = []
+ # Check if elements belong to the skybox
+ for label_id, _im in zip(labels, images):
+ if label_id in ignore_labels:
+ continue
+ mask = labeled_image == label_id
+ depth_field = crop_to_mask(depth_map, mask)
+ # Otherwise check if it is deep with a nice vertical decreasing gradient
+ masked_image = depth_field * crop_to_mask(mask, mask)
+
+ plt.imshow(depth_field)
+ plt.show()
+ plt.imshow(masked_image)
+ plt.show()
+ plt.imshow((masked_image[1:] - masked_image[:-1]))
+ plt.show()
+ slope = np.mean(
+ (masked_image[1:] - masked_image[:-1])[crop_to_mask(mask, mask)[1:]]
+ )
+ print(label_id, "has slope", slope)
+ if slope < 0.002:
+ # Mark the label as terrain
+ print(label_id, "is terrain")
+ terrain_list.append(label_id)
+
+ return terrain_list
+
+
+def segment_parts():
+ """Segment an image into skybox and terrain parts."""
+ facebook_mask_former_labels = mask_former(Image.open(DEFAULT_IMAGE))
+ skybox_indices = mask_skybox(
+ DEFAULT_IMAGE, "../outputs/depth_map.png", facebook_mask_former_labels
+ )
+ terrain_indices = mask_terrain(
+ DEFAULT_IMAGE,
+ "../outputs/depth_map.png",
+ facebook_mask_former_labels,
+ skybox_indices,
+ )
+ print("terrain indices are ", terrain_indices)
+
+
+def segment_skybox(segmentation, depth_map):
+ """
+ Probability for each segment to be a skybox part.
+
+ :param dict segmentation: The segmentation data.
+ :param np.ndarray depth_map: The depth of each pixel.
+
+ :return torch.Tensor: Tensor of probability for each segment.
+ """
+ reduced_depth = force_monotonous(depth_map, bottom_to_top=False)
+ mean_depth = np.mean(reduced_depth, axis=1)
+ norm_mean_depth = (mean_depth - mean_depth.min()) / (
+ mean_depth.max() - mean_depth.min()
+ )
+
+ y_indices = torch.linspace(torch.pi / 2, -torch.pi / 2, depth_map.shape[0])
+ height_distribution = (torch.sin(y_indices) + 1) / 2
+ segmented = segmentation["segmentation"]
+ has_undefined = 0 in segmented
+ masks = np.empty(
+ (
+ len(np.unique(segmented)) + has_undefined + 1,
+ depth_map.shape[1],
+ depth_map.shape[0],
+ )
+ )
+ if has_undefined:
+ masks[0] = segmented.T == 0
+ sky_ids = get_sky_ids()
+ sky_detected = []
+ for i, info in enumerate(segmentation["segments_info"]):
+ masks[i + has_undefined] = segmented.T == (i + has_undefined)
+ if info["label_id"] in sky_ids:
+ sky_detected.append(i + has_undefined)
+
+ """
+ # Just some visualization code (to delete)
+
+ plt.plot(height_distribution, label="Probability following y")
+ plt.plot(norm_mean_depth, label="Probability following mean depth")
+ plt.plot(height_distribution * norm_mean_depth, label="Combined probability")
+ plt.xlabel("Height in pixel coordinates (0 = image top)")
+ plt.ylabel("Probability of being above the horizon")
+ plt.title("Probability of an horizontal line to be above the horizon (on y coordinate)")
+ plt.legend()
+ plt.grid()
+ plt.show()
+ """
+
+ sky_probability = torch.mean(
+ height_distribution * norm_mean_depth * masks, axis=(1, 2)
+ )
+ return sky_probability
+
+
+def get_skybox_mask(segmentation, depth_map, closest_plane=0.3, farthest_plane=0.7):
+ """
+ Return the skybox mask for a given image.
+
+ :param dict segmentation: Panoptic segmentation from Mask2Former
+ :param numpy.ndarray depth_map: Array of depth for each pixel.
+ :param float closest_plane: Pixels closer to this plane cannot be a part of the skybox.
+ :param float farthest_plane: Pixels above this plane are automatically a part of the skybox.
+ :return numpy.ndarray: A binary mask of the same size as the input image.
+ """
+ sky_probability = segment_skybox(segmentation, depth_map)
+ # Use a threshold or at least one element
+ threshold = min(0.5, torch.max(sky_probability))
+ passing_sky = torch.argwhere(sky_probability >= threshold)
+ masks = [segmentation["segmentation"] == i for i in passing_sky]
+ # Far plane has to be a part of the skybox
+ far_plane = torch.from_numpy(depth_map > farthest_plane)
+ masks.append(far_plane)
+ composite_mask = torch.logical_or(*masks)
+ # Near plane cannot be a part of the skybox
+ not_near_plane = torch.from_numpy(depth_map > closest_plane)
+ return torch.logical_and(composite_mask, not_near_plane).numpy()
+
+
+def force_monotonous(data, bottom_to_top=True):
+ """Check area where depth coordinate increase monotonously."""
+ output = np.empty(data.shape)
+ flipper = -bottom_to_top
+ prog_depth = data[flipper]
+ for i in range(data.shape[0] + flipper - 1):
+ slicer_index = (-1 if bottom_to_top else 1) * (i + 1)
+ prog_depth = np.max([data[slicer_index], prog_depth], axis=0)
+ output[slicer_index] = prog_depth
+
+ return output
+
+
+def increasing_depth(monotonous_depth):
+ """
+ Enhance the monotonous depth map by rewriting all points with positive of null gradient.
+
+ The idea is to get a strictly monotonous map that would be similar
+ to the natural view direction.
+
+ :param numpy.ndarray monotonous_depth: Depth of monotonous progression
+ :return numpy.ndarray: A natural progression of the depth.
+ """
+ grad = np.gradient(monotonous_depth, axis=0)
+ # Skip indices of null gradient
+ corrupt_depth = np.copy(monotonous_depth)
+ corrupt_depth[grad >= 0] = np.nan
+ median_depths = np.nanmedian(corrupt_depth, axis=1)
+ # Replace by average depth
+ corrupt_depth[np.isnan(corrupt_depth)] = np.tile(
+ median_depths, (monotonous_depth.shape[1], 1)
+ ).T[np.isnan(corrupt_depth)]
+ return force_monotonous(corrupt_depth)
+
+
+def get_ground_mask(depth_map, ground_mask=None):
+ """
+ Identify and segment the ground in a given depth map.
+
+ This function uses a combination of depth gradient, monotonicity,
+ and clustering to identify the ground.
+ It then applies a Gaussian filter to smoothen the result and
+ DBSCAN to further segment the ground.
+
+ :param numpy.ndarray depth_map: The depth map of the image.
+ :param numpy.ndarray | None ground_mask: A mask indicating the initial ground pixels.
+
+ :return numpy.ndarray: A binary mask indicating the ground pixels.
+ """
+ monotonous_depth = force_monotonous(depth_map)
+ grad = np.gradient(monotonous_depth, axis=0)
+ corrupt_depth = increasing_depth(monotonous_depth)
+ adherence = corrupt_depth - monotonous_depth
+ great_map = np.logical_and((adherence * corrupt_depth) < 0.1, grad < 0).astype(np.float32)
+ conv = gaussian_filter(great_map, sigma=10)
+ zones = conv > 0.5
+ # Now select which are is a part of the ground
+ points = np.argwhere(zones)
+ clustering = DBSCAN(eps=50, min_samples=2000).fit(points)
+
+ labels = clustering.labels_
+
+ # Number of clusters in labels, ignoring noise if present.
+ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
+ n_noise_ = list(labels).count(-1)
+
+ print(f"Estimated number of clusters: {n_clusters_}")
+ print(f"Estimated number of noise points: {n_noise_}")
+
+ clustered = -np.ones(depth_map.shape)
+ clustered[points[:, 0], points[:, 1]] = labels
+
+ new_ground_mask = clustered >= 0
+ if ground_mask is not None:
+ new_ground_mask = np.logical_and(new_ground_mask, ground_mask)
+
+ return new_ground_mask
+
+
+def segments_objects(depth_map, mask=None):
+ """
+ Segment the objects in the given depth map using DBSCAN clustering.
+
+ :param numpy.ndarray depth_map: The depth map of the image.
+ :param numpy.ndarray mask: A mask indicating the points to be considered. Defaults to None.
+
+ :return numpy.ndarray: The clustered labels for the objects in the depth map.
+ """
+ if mask is None:
+ remaining_indices = planar_grid(depth_map)
+ else:
+ remaining_indices = np.argwhere(mask)
+ remaining_deep_points = (
+ depth_map[remaining_indices[:, 0],
+ remaining_indices[:, 1]].reshape(-1, 1)
+ )
+
+ points = np.hstack((remaining_indices / 1024, scale(remaining_deep_points)))
+ clustering = DBSCAN(eps=0.1, min_samples=2000).fit(points)
+
+ labels = clustering.labels_
+
+ # Number of clusters in labels, ignoring noise if present.
+ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
+ n_noise_ = list(labels).count(-1)
+
+ print(f"Estimated number of object clusters: {n_clusters_}")
+ print(f"Estimated number of objects noise points: {n_noise_}")
+
+ clustered = -np.ones(depth_map.shape)
+ clustered[np.uint64(points[:, 0] * 1024), np.uint64(points[:, 1] * 1024)] = labels
+ return clustered
+
+
+def get_horizon_level(depth_map, sky_mask):
+ """Return the horizon line level."""
+ up = force_monotonous(depth_map)
+ down = force_monotonous(depth_map, False)
+
+ horizon_mask = np.logical_or(up == down, sky_mask)
+ horizon_line = np.argmin(horizon_mask, axis=0)
+ plt.imshow(horizon_mask)
+ plt.show()
+ return np.mean(horizon_line)
+
+
+def segment_anything(image, depth_map):
+ """
+ Segment the image into skybox, ground, and objects using depth map and panoptic segmentation.
+
+ :param PIL.Image.Image image: The input image.
+ :param numpy.ndarray depth_map: The depth map of the image.
+
+ :return: A combined mask: 0 for unidentified, 1 for sky, 2 for ground, >=3 for objects.
+ :rtype: numpy.ndarray
+ """
+ segmentation = panoptic_segmentation(image)[0]
+ sky_mask = get_skybox_mask(segmentation, depth_map)
+ ground_mask = get_ground_mask(depth_map, np.logical_not(sky_mask))
+
+ object_clusters = segments_objects(
+ depth_map, np.logical_not(np.logical_or(sky_mask, ground_mask))
+ )
+
+ return sky_mask, ground_mask, object_clusters
+
+
+def segmentation_demo(image_path, depth_map_path):
+ """
+ Demonstrate the segmentation process on an image using a depth map.
+
+ :param str image_path: The path to the input image file.
+ :param str depth_map_path: The path to the depth map file.
+ """
+ with open(depth_map_path, "rb") as file:
+ depth_map = np.load(file)
+ image = Image.open(image_path)
+ clusters = segment_anything(image, depth_map)
+ masks_aggregation = clusters[0] + clusters[1] * 2 + (clusters[2] + 3) * (clusters[2] >= 0)
+ Image.fromarray(masks_aggregation == 2).show()
+ plt.imshow(image)
+ plt.imshow(masks_aggregation, alpha=0.7)
+ plt.show()
+
+
+if __name__ == "__main__":
+ """
+ Different segmentation techniques
+
+ compare_segmentations(DEFAULT_IMAGE, '../outputs/depth_map.png')
+ segmentation_maps(DEFAULT_IMAGE, '../outputs/depth_map.png')
+ image_segments = segment_and_save(DEFAULT_IMAGE, '../outputs/depth_map.png')
+ segment_parts()
+ prepare_ground_mask("../forest.png", "depth.npy", "mask.npy")
+ """
+ segmentation_demo(DEFAULT_IMAGE, "depth.npy")
diff --git a/environment/mask_former.py b/environment/mask_former.py
new file mode 100644
index 0000000..f211f5c
--- /dev/null
+++ b/environment/mask_former.py
@@ -0,0 +1,134 @@
+"""
+MaskFormer segmentation by Facebook.
+"""
+
+from transformers import Mask2FormerForUniversalSegmentation, Mask2FormerImageProcessor
+from PIL import Image
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import torch
+
+# model.config.id2label
+LABELS = {
+ "terrain": (
+ "floor-wood",
+ "flower",
+ "gravel",
+ "river",
+ "road",
+ "sand",
+ "sea",
+ "snow",
+ "stairs",
+ "floor-other-merged",
+ "pavement-merged",
+ "mountain-merged",
+ "grass-merged",
+ "dirt-merged",
+ "building-other-merged",
+ "rock-merged",
+ "rug-merged",
+ ),
+ "sky": ("ceiling-merged", "sky-other-merged"),
+}
+
+
+def mask2former_model():
+ """Return the model for Mask2Former."""
+ model = Mask2FormerForUniversalSegmentation.from_pretrained(
+ "facebook/mask2former-swin-large-coco-panoptic"
+ )
+ return model
+
+
+def merge_label_list():
+ """Create the id_to_fuse list."""
+ label_list = [[] for _ in range(3)]
+ label2id = mask2former_model().config.label2id
+ for merge_list, key in zip(LABELS, label_list):
+ for label in LABELS[key]:
+ merge_list.append(label2id[label])
+ return label_list
+
+
+def panoptic_segmentation(image):
+ """
+ Apply a panoptic segmentation to a given image.
+
+ :return: Batch of panoptic segmentations,
+ each of which is a dict with keys "segmentation" and "segments_info".
+ :rtype: list
+ """
+ model = mask2former_model()
+ processor = Mask2FormerImageProcessor.from_pretrained(
+ "facebook/mask2former-swin-large-coco-panoptic"
+ )
+ inputs = processor(images=image, return_tensors="pt")
+
+ with torch.no_grad():
+ outputs = model(**inputs)
+
+ # Fuse terrain and sky elements: if the sky appears twice, group it in the same group
+ label_ids_to_fuse = list(model.config.label2id[label] for label in LABELS["terrain"])
+ label_ids_to_fuse.extend(model.config.label2id[label] for label in LABELS["sky"])
+
+ return processor.post_process_panoptic_segmentation(
+ outputs, target_sizes=[image.size[::-1]], label_ids_to_fuse=label_ids_to_fuse
+ )
+
+
+def mask_former(image):
+ """
+ Assign mask to the pixels of an image.
+
+ :param PIL.Image.Image image: Image to segment.
+ """
+ result = panoptic_segmentation(image)[0]
+ # we refer to the demo notebooks for visualization
+ # (see "Resources" section in the Mask2Former docs)
+ predicted_panoptic_map = result["segmentation"]
+ # Convert the tensor to a NumPy array
+ return predicted_panoptic_map.squeeze().detach().numpy()
+
+
+def get_model_labels():
+ """A dictionary of id to labels associations for the model used."""
+ model = mask2former_model()
+ return model.config.id2label
+
+
+def get_sky_ids():
+ """The ids of the elements corresponding to the sky."""
+ label2id = mask2former_model().config.label2id
+ return tuple(label2id[label] for label in LABELS['sky'])
+
+
+def main(image):
+ """Demonstration usage of MaskFormer."""
+ result = panoptic_segmentation(image)[0]
+ segments_map = result["segmentation"].squeeze().detach()
+ segments_info = result["segments_info"]
+ values = torch.unique(segments_map)
+
+ _fig, ax = plt.subplots()
+ im = ax.imshow(segments_map)
+ ax.imshow(image, alpha=0.5)
+
+ # get the colors of the values, according to the
+ # colormap used by imshow
+ colors = [im.cmap(im.norm(value)) for value in values]
+ # create a patch (proxy artist) for every color
+ id2label = get_model_labels()
+ labels_list = [id2label[seg["label_id"]] + f" ({seg['label_id']})" for seg in segments_info]
+ if 0 in values:
+ labels_list.insert(0, "Unknown (0)")
+ patches = [
+ mpl.patches.Patch(color=colors[i], label=label) for i, label in enumerate(labels_list)
+ ]
+ # put those patched as legend-handles into the legend
+ plt.legend(handles=patches)
+ plt.show()
+
+
+if __name__ == "__main__":
+ main(Image.open("../island.png"))
diff --git a/environment/mesh_pipeline.py b/environment/mesh_pipeline.py
new file mode 100644
index 0000000..5374ad3
--- /dev/null
+++ b/environment/mesh_pipeline.py
@@ -0,0 +1,189 @@
+"""
+Functions to generate a mesh from an RGBD image.
+"""
+
+import numpy as np
+import open3d as o3d
+import torch
+
+
+def reduce_image_size(initial_data, resolution):
+ """
+ Take an input 2D array and averages it to reduce its size.
+
+ :param numpy.ndarray initial_data: Initial array of shape (W, H)
+ :param int resolution: Target resolution to match.
+ :return numpy.ndarray: A new array of size (resolution, resolution)
+ """
+ # We have more depth pixels than vertices, hence the average
+ dilate = initial_data.shape[0] // resolution, initial_data.shape[1] // resolution
+
+ averager = torch.nn.AvgPool2d(dilate, stride=dilate)
+ # Reshape as (N, C, H, W) and pass to torch
+ tensor_data = torch.from_numpy(
+ initial_data.reshape(1, 1, initial_data.shape[0], initial_data.shape[1])
+ )
+ average = np.asarray(averager(tensor_data)[0, 0])
+ return average
+
+
+def create_triangle_list(rows, cols):
+ """
+ Create an indices of triangles that maps the vertices in a planar mesh.
+
+ :param int rows: Number of rows
+ :param int cols: Number of columns.
+ :return numpy.ndarray: Triangles indices of shape (rows * cols * 2, 3)
+ """
+ # Create a grid of vertex indices
+ indices = np.arange(rows * cols).reshape(rows, cols)
+
+ # Generate triangles
+ triangles = []
+
+ # Upper-left triangles
+ upper_left = indices[:-1, :-1].reshape(-1, 1)
+ upper_right = indices[:-1, 1:].reshape(-1, 1)
+ lower_left = indices[1:, :-1].reshape(-1, 1)
+ triangles.append(np.hstack((upper_left, lower_left, upper_right)))
+
+ # Lower-right triangles
+ lower_right = indices[1:, 1:].reshape(-1, 1)
+ triangles.append(np.hstack((upper_right, lower_left, lower_right)))
+
+ # Combine all triangles
+ return np.vstack(triangles)
+
+
+def create_mesh_geometry(max_resolution, depth_data):
+ """
+ Create a set of vertices and triangles as a plan with deformation.
+
+ :param max_resolution: The approximate maximum resolution of the generated mesh.
+ :type max_resolution: int
+
+ :param depth_data: The depth data of the input image.
+ :type depth_data: numpy.ndarray
+
+ :return: A tuple containing the vertices and triangles of the generated mesh.
+ :rtype: tuple
+
+ This function creates a grid of vertices positions based on the depth data of the input image.
+ It then assigns the vertices position and depth to the grid.
+ Finally, it creates a grid of triangles for the planar mesh.
+
+ The function first calculates the average depth value for each pixel in the depth data.
+ It then generates a grid of vertices positions based on the average depth values.
+ The vertices are assigned positions based on their corresponding pixel coordinates
+ in the depth data.
+
+ The function then creates a grid of triangles for the planar mesh.
+
+ The function returns a tuple containing the vertices and triangles of the generated mesh.
+ """
+ # Grid of vertices positions
+ resized_depth = reduce_image_size(depth_data, max_resolution)
+ resolution = np.shape(resized_depth)
+ # plot_arrays(data, average)
+
+ # Assign vertices position and depth
+ x_grid, y_grid = np.meshgrid(
+ np.linspace(0, 1, resolution[1]), np.linspace(1, 0, resolution[0])
+ )
+
+ # Legacy code? view_height = 0.3
+ # Legacy code? z_grid = np.sqrt(np.abs(resized_depth ** 2 - (y_grid - view_height) ** 2))
+
+ vertices = np.column_stack(
+ (x_grid.flatten(), y_grid.flatten(), resized_depth.flatten())
+ )
+
+ # Create a grid of triangles for the planar mesh
+ triangles = create_triangle_list(resolution[0], resolution[1])
+
+ return vertices, triangles
+
+
+def generate_uv(triangles, vertices):
+ """
+ Generate the uv coordinates for a planar mesh.
+
+ :param triangles: An array of shape (n, 3) for the indices of the vertices in the mesh.
+ :type triangles: numpy.ndarray
+
+ :param vertices: An array of shape (n, 3) for the 3D coordinates of the vertices in the mesh.
+ :type vertices: numpy.ndarray
+
+ :return: An array of shape (n*3, 2) representing the uv coordinates of the vertices in the mesh.
+ :rtype: numpy.ndarray
+
+ This function generates the uv coordinates for a planar mesh.
+ It takes as input the indices and 3D coordinates of the vertices in the mesh,
+ and returns a numpy array containing the uv coordinates of the vertices.
+
+ Example usage:
+
+ ```python
+ triangles = np.array([[0, 1, 2]])
+ vertices = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0]])
+ uv_coords = generate_uv(triangles, vertices)
+ print(uv_coords)
+ ```
+ """
+ v_uv = np.empty((len(triangles) * 3, 2))
+ for i, t in enumerate(triangles):
+ for j in range(3):
+ v_uv[i * 3 + j] = vertices[t[j]][:2] * [1, -1]
+ return v_uv
+
+
+def mesh_impression_pipeline(depth_map, max_resolution=256, texture_image=None):
+ """
+ Pipeline to create a mesh from a depth map.
+
+ :param depth_map: The input depth map.
+ :type depth_map: numpy.ndarray
+
+ :param max_resolution: Approximate maximum resolution of the generated mesh.
+ :type max_resolution: int
+
+ :param texture_image: The texture image for the mesh.
+ :type texture_image: open3d.geometry.Image or None
+
+ :return: A 3D mesh created from the input depth map.
+ :rtype: open3d.geometry.TriangleMesh
+
+ This function creates a 3D mesh from a depth map using the following steps:
+
+ 1. Create a grid of vertices positions based on the depth data of the input depth map.
+ 2. Assign the vertices position and depth to the grid.
+ 3. Create a grid of triangles for the planar mesh.
+ 4. Generate the uv coordinates for the mesh.
+ 5. Load a texture image (if provided) and assign it to the mesh.
+ 6. Compute the vertex normals for the mesh.
+
+ Example usage:
+
+ ```python
+ depth_map = np.asarray(Image.open("depth_map.png")) / 65535
+ mesh = mesh_impression_pipeline(depth_map, 256, "texture.png")
+ ```
+ """
+ # Create the mesh
+ vertices, triangles = create_mesh_geometry(max_resolution, np.asarray(depth_map))
+
+ mesh = o3d.geometry.TriangleMesh(
+ o3d.utility.Vector3dVector(vertices), o3d.utility.Vector3iVector(triangles)
+ )
+
+ # Load a texture image (change the file path accordingly)
+ v_uv = generate_uv(triangles, vertices)
+ mesh.triangle_uvs = o3d.utility.Vector2dVector(v_uv)
+
+ if texture_image is not None:
+ texture_image = o3d.io.read_image(texture_image)
+ mesh.textures = [texture_image]
+
+ mesh.compute_vertex_normals()
+
+ return mesh
diff --git a/environment/point_cloud_pipeline.py b/environment/point_cloud_pipeline.py
new file mode 100644
index 0000000..53d49ea
--- /dev/null
+++ b/environment/point_cloud_pipeline.py
@@ -0,0 +1,201 @@
+"""
+RGBD Image to mesh using a point cloud strategy.
+"""
+
+import numpy as np
+import open3d as o3d
+import matplotlib.pyplot as plt
+
+
+def generate_point_cloud(rgbd_image):
+ """
+ Generate a point cloud from an RGBD image.
+
+ :param rgbd_image: An RGBDImage containing the color and depth components of an image.
+ :type rgbd_image: open3d.geometry.RGBDImage
+
+ :return: 3D point cloud generated from the input RGBD image.
+ :rtype: open3d.geometry.PointCloud
+
+ This function creates a point cloud from an RGBD image using the provided camera
+ intrinsic and extrinsic parameters.
+ The generated point cloud contains the 3D coordinates of the image's pixels,
+ with depth values corresponding to the depth information in the input RGBD image.
+ """
+ # Default camera: o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+ focal_distance = 200
+ distant_camera = o3d.camera.PinholeCameraIntrinsic(
+ 1, 1, focal_distance, focal_distance, 0, 0
+ )
+ extrinsic_parameters = [
+ [1, 0, 0, 0.5],
+ [0, -1, 0, 0.5],
+ [0, 0, -1, 0],
+ [0, 0, 0, 1],
+ ]
+
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+ rgbd_image, intrinsic=distant_camera, extrinsic=extrinsic_parameters
+ )
+ return pcd
+
+
+def pcd_from_image(rgb_image, depth_map):
+ """
+ Convert an RGB image and a depth map into a point cloud (pcd).
+
+ :param str rgb_image: The input RGB image.
+
+ :param open3d.io.Image depth_map: The input depth map.
+
+ :return: The 3D point cloud generated from the input RGB image and depth map.
+ :rtype: open3d.geometry.PointCloud
+
+ This function creates a point cloud from an RGB image and a depth map using the Open3D library.
+ The generated point cloud contains the 3D coordinates of the image's pixels,
+ with depth values corresponding to the depth information in the input depth map.
+
+ Note: The input RGB image and depth map should be in the same coordinate system and
+ have the same dimensions.
+ """
+ rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+ color=o3d.io.read_image(rgb_image),
+ depth=depth_map,
+ depth_scale=1,
+ convert_rgb_to_intensity=False,
+ )
+ # For debugging: view_flat_estimation(rgbd_image)
+ # To use a point cloud: pcd = generate_point_cloud(rgbd_image)
+ pcd = o3d.geometry.PointCloud()
+ shape = np.shape(rgbd_image.depth)
+ x_grid, y_grid = np.meshgrid(
+ np.linspace(0, 1, shape[0]), np.linspace(1, 0, shape[1])
+ )
+ points = np.column_stack(
+ (x_grid.flatten(), y_grid.flatten(), 1 - np.asarray(rgbd_image.depth).flatten())
+ )
+ pcd.points = o3d.utility.Vector3dVector(points)
+ colors = np.asarray(rgbd_image.color).reshape(shape[0] * shape[1], -1) / 256
+ pcd.colors = o3d.utility.Vector3dVector(colors)
+ pcd.estimate_normals(
+ search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)
+ )
+ return pcd
+
+
+def clustering(pcd):
+ """
+ Cluster the point cloud data.
+
+ :param pcd: The input point cloud data.
+ :type pcd: open3d.geometry.PointCloud.PointCloud
+ :return: The clustered point cloud data.
+ :rtype: open3d.geometry.PointCloud.PointCloud
+
+ This function clusters the input point cloud data using the DBSCAN algorithm.
+ The function first creates a new point cloud object from the input point cloud.
+ Then, it sets the minimum distance between points to be the square root of the number of points
+ in the input point cloud.
+ With this distance, it performs the DBSCAN clustering algorithm
+ with a minimum number of points set to 300.
+ The function then assigns colors to the points based on their cluster labels and returns
+ the clustered point cloud data.
+
+ Example usage:
+
+ ```python
+ pcd = generate_point_cloud(rgbd_image)
+ clustered_pcd = clustering(pcd)
+ ```
+ """
+ new_pcd = o3d.geometry.PointCloud(pcd)
+ min_dist = len(pcd.points) ** -0.5
+ with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug):
+ print(min_dist)
+ labels = np.array(
+ pcd.cluster_dbscan(eps=min_dist * 20, min_points=300, print_progress=True)
+ )
+
+ max_label = labels.max()
+ print(f"point cloud has {max_label + 1} clusters")
+ colors = plt.get_cmap("tab20")(labels / (max_label if max_label > 0 else 1))
+ colors[labels < 0] = 0
+ new_pcd.colors = o3d.utility.Vector3dVector(colors[:, :3])
+ o3d.visualization.draw_geometries([new_pcd])
+ return new_pcd
+
+
+def view_densities(mesh, densities):
+ """
+ Visualize the density values of a mesh using Open3D.
+
+ :param mesh: The input mesh data.
+ :type mesh: open3d.geometry.TriangleMesh
+
+ :param densities: The input density values.
+ :type densities: numpy.ndarray
+
+ :return: The visualized mesh with density values.
+ :rtype: open3d.geometry.TriangleMesh
+
+ This function visualizes the density values of a mesh using Open3D.
+ Then, it assigns these colors to the vertices of the input mesh based on their density values.
+ Finally, it returns the visualized mesh with density values.
+
+ Example usage:
+
+ ```python
+ mesh = generate_mesh("sunny_mountain.png", "outputs/3D view.obj", "outputs/depth_map.png")
+ density_mesh = view_densities(mesh, densities)
+ ```
+ """
+ densities = np.asarray(densities)
+ density_colors = plt.get_cmap("plasma")(
+ (densities - densities.min()) / (densities.max() - densities.min())
+ )
+ density_colors = density_colors[:, :3]
+ density_mesh = o3d.geometry.TriangleMesh()
+ density_mesh.vertices = mesh.vertices
+ density_mesh.triangles = mesh.triangles
+ density_mesh.triangle_normals = mesh.triangle_normals
+ density_mesh.vertex_colors = o3d.utility.Vector3dVector(density_colors)
+ o3d.visualization.draw_geometries([density_mesh])
+ return density_mesh
+
+
+def point_cloud_pipeline(rgb_image, depth_map):
+ """
+ Pipeline to create a 3D mesh from an image and a depth map.
+
+ :param str rgb_image: The input RGB image.
+ :param depth_map: The input depth map.
+
+ :return: A 3D mesh created from the input RGB image and depth map.
+ :rtype: open3d.geometry.TriangleMesh
+
+ This function first converts the input RGB image and depth map into a point cloud.
+ Then, it creates a 3D mesh from the point cloud.
+ Finally, it removes the vertices with low density values and returns the resulting 3D mesh.
+
+ Example usage:
+
+ ```python
+ mesh = point_cloud_pipeline("sunny_mountain.png", "outputs/depth_map.png")
+ ```
+ """
+ pcd = pcd_from_image(rgb_image, depth_map)
+
+ # Basic clustering: clustering(pcd)
+
+ poisson_mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
+ pcd=pcd, depth=10
+ )
+ # view_densities(poisson_mesh, densities)
+
+ vertices_to_remove = densities < np.quantile(densities, 0.018)
+ trimmed_mesh = o3d.geometry.TriangleMesh(poisson_mesh)
+ trimmed_mesh.remove_vertices_by_mask(vertices_to_remove)
+
+ print("Displaying reconstructed mesh ...")
+ o3d.visualization.draw_geometries([trimmed_mesh])
+ return poisson_mesh
diff --git a/environment/renderer.py b/environment/renderer.py
new file mode 100644
index 0000000..89bf1a9
--- /dev/null
+++ b/environment/renderer.py
@@ -0,0 +1,500 @@
+"""
+Generate a 2.5D view of a 2D image.
+
+Currently, using marigold-v1-0 (https://huggingface.co/prs-eth/marigold-v1-0)
+
+Source code:
+https://github.com/huggingface/diffusers/tree/main/examples/community#marigold-depth-estimation
+"""
+
+import warnings
+
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+import open3d
+import scipy
+from skimage.restoration.inpaint import inpaint_biharmonic
+
+from skybox.inpainting import inpaint_image
+from environment.depth_generation import get_depth
+
+from environment.mesh_pipeline import mesh_impression_pipeline
+from environment.image_segmentation import (
+ segment_anything,
+ crop_to_mask,
+ force_monotonous,
+ increasing_depth,
+)
+from environment.depth_inpainting import inpaint_depth_controlled
+
+
+def cylindrical_projection(flat_vertices, total_angle):
+ """Map from vertices from a flat panorama to a circular geometry."""
+ vertices = flat_vertices
+ far_plane = 10
+ radii = vertices[:, 2] * far_plane
+ angles = 2 * np.pi * vertices[:, 0] * total_angle
+ new_vertices = np.dstack(
+ (radii * np.cos(angles), vertices[:, 1], radii * np.sin(angles))
+ )[0]
+ return new_vertices
+
+
+def spherical_projection(flat_vertices):
+ """Map from vertices from a flat panorama to a circular geometry."""
+ vertices = np.asarray(flat_vertices)
+ radii = vertices[:, 2]
+ # [0, 1] -> [0, tau]
+ theta = 2 * np.pi * vertices[:, 0]
+ # [1, 0] -> [pi / 2, -pi / 2]
+ phi = -np.pi / 2 + vertices[:, 1] * np.pi
+ new_vertices = np.dstack(
+ (
+ radii * np.cos(theta),
+ radii * np.sin(theta) * np.sin(phi),
+ radii * np.sin(theta) * np.cos(phi),
+ )
+ )[0]
+ return new_vertices
+
+
+def normalize_depth(vertices):
+ """Simple depth normalization between 0 and 1."""
+ vertices[:, 2] = (vertices[:, 2] - np.min(vertices[:, 2])) / (
+ np.max(vertices[:, 2]) - np.min(vertices[:, 2])
+ )
+ return vertices
+
+
+def force_ground_closing(vertices):
+ """
+ Apply a force to the vertices of a 3D mesh that pulls the lowest vertices to the center.
+
+ :param vertices: A numpy array representing the 3D coordinates of the vertices.
+ :type vertices: numpy.ndarray
+
+ :return: The modified 3D coordinates of the vertices after applying the force.
+ :rtype: numpy.ndarray
+ """
+ heights = vertices[:, 1]
+ # height = 0 -> 0
+ # height > 0.2 -> ~1
+ attractions = 1 - np.exp(-heights / 0.05)
+ new_vertices = np.copy(vertices)
+ new_vertices[:, 2] = 1 - new_vertices[:, 2]
+ new_vertices[:, 2] -= np.min(new_vertices[:, 2])
+ new_vertices[:, 2] *= -attractions
+ return new_vertices
+
+
+def remove_aberrant_triangles(mesh, limit=0.5):
+ """
+ This function removes triangles from a 3D mesh that have a normal on z below a specified limit.
+
+ :param mesh: A :class:`open3d.geometry.TriangleMesh` object representing the 3D mesh.
+ :type mesh: open3d.geometry.TriangleMesh
+
+ :param limit: A float value representing the minimum z normal for the triangles to be kept.
+ :type limit: float
+
+ :return: A new :class:`open3d.geometry.TriangleMesh` object with the aberrant triangles removed.
+ :rtype: open3d.geometry.TriangleMesh
+
+ This function takes a 3D mesh and a height limit as input.
+ It then removes all triangles from the mesh that have a height below the specified limit.
+ The function returns a new 3D mesh object with the aberrant triangles removed.
+ """
+ new_mesh = open3d.geometry.TriangleMesh(mesh)
+ triangles_list = np.nonzero(np.asarray(mesh.triangle_normals)[:, 2] < limit)[0]
+ new_mesh.remove_triangles_by_index(triangles_list)
+ return new_mesh
+
+
+def fold_as_panorama(mesh, total_angle=0.5):
+ """Fold a mesh as a panorama."""
+ # To force depth=0 when y=0: new_vertices = force_ground_closing(np.asarray(mesh.vertices))
+ new_vertices = normalize_depth(np.asarray(mesh.vertices))
+ new_vertices = cylindrical_projection(new_vertices, total_angle)
+ new_mesh = open3d.geometry.TriangleMesh(mesh)
+ new_mesh.vertices = open3d.utility.Vector3dVector(new_vertices)
+ return new_mesh
+
+
+def display_meshes(mesh_list):
+ """Remove the texture on a meshes to display them with open3d."""
+ for mesh in mesh_list:
+ mesh.textures = []
+ open3d.visualization.draw_geometries(mesh_list)
+
+
+def save_mesh(mesh, filename, view=False):
+ """
+ Save the mesh to a file.
+
+ :param mesh: The input mesh data.
+ :type mesh: open3d.geometry.TriangleMesh
+
+ :param filename: The path to the file where the mesh will be saved.
+ :type filename: str
+
+ :param view: A boolean flag indicating whether to visualize the mesh before saving it.
+ :type view: bool
+
+ Example usage:
+
+ ```python
+ mesh = generate_mesh("sunny_mountain.png", "outputs/3D view.obj", "outputs/depth_map.png")
+ save_mesh(mesh, "outputs/3D view.obj", view=True)
+ ```
+ """
+ # Disable texture for visualization
+ if view:
+ mesh.textures = []
+ open3d.visualization.draw_geometries([mesh], mesh_show_wireframe=True)
+
+ open3d.io.write_triangle_mesh(filename, mesh)
+
+
+def moving_average(data, window_size=5):
+ """Moving average over the given input data."""
+ return np.convolve(data, np.ones(window_size) / window_size, mode="same")
+
+
+def horizon_height(depth_map):
+ """Get the height of the horizon using gradient only, not precise."""
+ y_depth_grad = np.gradient(depth_map, axis=0)
+ reduced_grad = np.mean(y_depth_grad, axis=1)
+
+ # The ground is the area of negative gradient, find the first occurrence of positive gradient
+ smoothed_grad = moving_average(reduced_grad, depth_map.shape[0] // 50)
+
+ return np.argmax(smoothed_grad)
+
+
+def plot_horizon_computation(depth_map):
+ """Compare different horizon line finding methods."""
+ mean_depth = np.mean(depth_map, axis=1)
+ y_depth_grad = np.gradient(depth_map, axis=0)
+ reduced_grad = np.mean(y_depth_grad, axis=1)
+
+ # The ground is the area of negative gradient, find the first occurrence of positive gradient
+ smoothed_grad = moving_average(reduced_grad, depth_map.shape[0] // 50)
+
+ horizon = horizon_height(depth_map)
+
+ y_indices = np.arange(depth_map.shape[0])
+ _, axes = plt.subplots(1, 2, sharey=True)
+ axes[0].imshow(depth_map)
+ axes[0].plot(mean_depth * depth_map.shape[1], y_indices, label="Average depth")
+ axes[1].plot(reduced_grad, y_indices, label="Vertical depth gradient")
+ axes[1].plot(smoothed_grad, y_indices, label="Smoothed depth gradient")
+ axes[1].plot(
+ (np.min(reduced_grad), np.max(reduced_grad)),
+ (horizon, horizon),
+ label="Detected horizon",
+ )
+ axes[1].plot(
+ moving_average(smoothed_grad),
+ y_indices,
+ )
+ axes[1].plot((0, 0), (0, depth_map.shape[0]))
+ plt.grid()
+ axes[1].set_position(
+ [
+ axes[1].get_position().x0,
+ axes[0].get_position().y0,
+ axes[1].get_position().width,
+ axes[0].get_position().height,
+ ]
+ )
+ plt.legend()
+ plt.show()
+ exit()
+
+
+def generate_mesh(texture, depth_map, resolution=256):
+ """
+ Generate a 3D mesh from an image and a depth map.
+
+ :param open3d.geometry.Image texture: The input RGB image to use as mesh texture.
+ :param depth_map: The input depth map.
+ :type depth_map: numpy.ndarray
+ :param int resolution: Vertices per side in the generated mesh.
+
+ :return: A 3D mesh created from the input RGB image and depth map.
+ :rtype: open3d.geometry.TriangleMesh
+
+ This function generates a 3D mesh from an image and a depth map using the following steps:
+
+ 1. Convert the input RGB image and depth map into a point cloud.
+ 2. Create a 3D mesh from the point cloud using a Poisson surface reconstruction.
+ 3. Remove the vertices with low density values.
+ 4. Save the resulting 3D mesh to a file specified by the `output_mesh` parameter.
+
+ Example usage:
+
+ ```python
+ generate_mesh("sunny_mountain.png", "outputs/3D view.obj", "outputs/depth_map.png")
+ ```
+ """
+ # To use the point cloud alternative: mesh = environment.point_cloud_pipeline(input_image, depth_map)
+ mesh = mesh_impression_pipeline(depth_map, resolution, texture)
+ new_mesh = remove_aberrant_triangles(mesh, 0.1)
+ new_mesh = fold_as_panorama(new_mesh, 1)
+ # display_meshes([mesh, new_mesh])
+ return new_mesh
+
+
+def mesh_panorama_from_files(
+ input_image, output_mesh, depth_image=None, resolution=256
+):
+ """
+ Generate a 3D mesh from an image and a depth map.
+
+ :param str input_image: The input RGB image.
+ :param str output_mesh: The path to the file where the mesh will be saved.
+ :param depth_image: The input depth map. If not provided, it will be generated.
+ :type depth_image: str or None
+ :param int resolution: Vertices per side in the generated mesh.
+
+ :return: A 3D mesh created from the input RGB image and depth map.
+ :rtype: open3d.geometry.TriangleMesh
+
+ Example usage:
+
+ ```python
+ mesh_panorama_from_files("sunny_mountain.png", "outputs/3D view.obj", "outputs/depth_map.png")
+ ```
+ """
+ if depth_image is None:
+ depth_map = get_depth(Image.open(input_image))
+ else:
+ # Image im I;16 format with depth on 16 bits.
+ depth_map = np.asarray(open3d.io.read_image(depth_image)) / (2**16 - 1)
+ plot_horizon_computation(depth_map)
+ main_texture = open3d.io.read_image(input_image)
+ new_mesh = generate_mesh(main_texture, depth_map, resolution)
+
+ if output_mesh is not None:
+ save_mesh(new_mesh, output_mesh)
+ print(f"Mesh saved as '{output_mesh}'.")
+
+ return new_mesh
+
+
+def segment_stuff(image_path, depth_path=None):
+ """
+ This function generates a mask of the skybox in an input image.
+
+ :param str image_path: The path to the input image.
+ :param str depth_path: The path to the input depth map. If not provided, it will be generated.
+
+ :return: A binary mask of the skybox in the input image.
+ :rtype: numpy.ndarray
+
+ Example usage:
+
+ ```python
+ skybox_mask = mask_skybox("../sunny_mountain.png", "sunny_depth_map.png")
+ plt.imshow(skybox_mask)
+ plt.show()
+ ```
+ """
+ image = Image.open(image_path)
+ if depth_path is None:
+ depth_map = get_depth(image)
+ else:
+ # Image im I;16 format with depth on 16 bits.
+ depth_map = np.asarray(open3d.io.read_image(depth_path)) / (2**16 - 1)
+ np.save("depth.npy", depth_map)
+ return segment_anything(image, depth_map)
+
+
+def mask_image(image, mask):
+ """Extract the part of an image that matches the given mask."""
+ indices = np.argwhere(mask)
+ pixels = np.zeros_like(image)
+ pixels[indices[:, 0], indices[:, 1]] = image[indices[:, 0], indices[:, 1]]
+ return pixels
+
+
+def filling_strategy(image_np, large_mask):
+ """Create a very large image with many mirrored views."""
+ cropped = crop_to_mask(image_np, large_mask)
+ # Fill holes
+
+ # Map in big size
+ large_skybox = np.empty(
+ (cropped.shape[0] * 3, cropped.shape[1] * 3, cropped.shape[2])
+ )
+ elem = cropped[:, ::-1]
+ for i in range(3):
+ elem = elem[::-1]
+ for j in range(3):
+ if j != 0:
+ elem = elem[:, ::-1]
+ large_skybox[
+ i * cropped.shape[0] : (i + 1) * cropped.shape[0],
+ j * cropped.shape[1] : (j + 1) * cropped.shape[1],
+ ] = elem
+ Image.fromarray(large_skybox.astype(np.uint8)).show()
+
+ # Fill holes
+ # For testing: return large_skybox
+
+ raise NotImplementedError("This function is not finished.")
+
+
+def enlarge_mask(initial_mask, iterations=20):
+ """Enlarge an input mask by applying a binary dilatation repetitively."""
+ large_mask = initial_mask
+ for _ in range(iterations):
+ large_mask = scipy.ndimage.binary_dilation(large_mask)
+ return large_mask
+
+
+def inpaint_skybox(image_np, skybox):
+ """Apply inpainting to complete the skybox."""
+ large_mask = enlarge_mask(np.logical_not(skybox))
+ inpainted_skybox = Image.fromarray(
+ (inpaint_biharmonic(image_np, large_mask, channel_axis=-1) * 255).astype(
+ np.uint8
+ )
+ )
+ inpainted_skybox.show()
+ complete_skybox = inpaint_image(
+ "the sky", inpainted_skybox, Image.fromarray(large_mask), num_inference_steps=50
+ )[0]
+ return complete_skybox
+
+
+def inpaint_ground(image, image_np, depth_map, ground, filling_mask):
+ """Apply inpainting to complete the ground."""
+ ground_segment = mask_image(image_np, ground)
+
+ stretched_ground = (
+ Image.fromarray(ground_segment)
+ .resize((image.width * 5, image.height))
+ .crop((image.width * 2, 0, image.width * 3, image.height))
+ )
+
+ inpainted_ground = Image.fromarray(ground_segment)
+ inpainted_ground.paste(stretched_ground)
+ inpainted_ground.paste(
+ Image.fromarray(ground_segment), mask=Image.fromarray(ground)
+ )
+
+ print("Completing ground, inpainting")
+ complete_ground = inpaint_image("the ground", inpainted_ground, filling_mask)[0]
+ complete_ground.show()
+ print("Completing ground with controlnet")
+ ground_depth = 1 - increasing_depth(force_monotonous(depth_map))
+ complete_ground_controlled = inpaint_depth_controlled(
+ inpainted_ground,
+ filling_mask,
+ Image.fromarray(ground_depth),
+ "the ground",
+ )[0]
+ return complete_ground_controlled
+
+
+def complete_segments(image, depth_map, skybox, ground, objects):
+ """
+ Process the image parts.
+
+ - Sky: Reshape image size, inpaint holes.
+ - Ground: Inpaint holes, reshape rectangle.
+ - Objects: Store depth, normal map (?).
+
+ :param PIL.Image.Image image: The initial image
+ :param numpy.ndarray depth_map: The depth map
+ :param PIL.Image.Image skybox: The masked skybox
+ :param PIL.Image.Image ground: The masked ground
+ :param list[PIL.Image.Image] objects: Each masked object
+
+ :return: The completed segments.
+ :rtype: tuple[PIL.Image.Image, PIL.Image.Image, list[tuple[PIL.Image.Image, numpy.ndarray]]]
+ """
+ image_np = np.asarray(image)
+
+ # Complete the skybox
+ complete_skybox = inpaint_skybox(image_np, skybox)
+ complete_skybox.show()
+
+ # Complete the terrain
+ filling_mask = np.logical_and(
+ np.logical_not(ground), np.logical_not(skybox), objects >= 0
+ )
+ filling_mask = enlarge_mask(filling_mask)
+ complete_ground = inpaint_ground(
+ image, image_np, depth_map, ground, Image.fromarray(filling_mask)
+ )
+ complete_ground.show()
+
+ objects_data = []
+ # Save the objects
+ for i in range(int(np.max(objects)) + 1):
+ mask = objects == i
+ cropping = crop_to_mask(image_np, mask)
+ depth = crop_to_mask(depth_map, mask)
+ # Handle occlusions
+ objects_data.append((cropping, depth))
+ warnings.warn("Objects occlusions cannot be handled yet.")
+
+ return complete_skybox, complete_ground, objects_data
+
+
+def save_as_scene(skybox, terrain, depth_map, _objects):
+ """
+ Save all the elements as objects in a scene.
+
+ :param PIL.Image.Image skybox: Skybox to save.
+ :param PIL.Image.Image terrain: Terrain texture.
+ :param numpy.ndarray depth_map: Terrain depth map.
+ :param _objects: Objects to save.
+ :type _objects: list[tuple[PIL.Image.Image, numpy.ndarray]]
+ """
+ skybox_path = "outputs/complete_skybox.png"
+ skybox.save("outputs/complete_skybox.png")
+ print("Saved the skybox under " + skybox_path)
+ terrain_texture_path = "outputs/terrain_texture.png"
+ terrain.save(terrain_texture_path)
+ terrain_mesh = generate_mesh(open3d.io.read_image(terrain_texture_path), depth_map)
+ terrain_mesh_path = "outputs/terrain_mesh.obj"
+ save_mesh(terrain_mesh, terrain_mesh_path)
+ print("Saved the mesh under " + terrain_mesh_path)
+ warnings.warn("Objects are not handled yet.")
+
+
+if __name__ == "__main__":
+ """
+ # To generate a new mesh
+ mesh_panorama_from_files("../sunny_mountain.png", "outputs/sunny 3D.obj", "outputs/sunny_depth_map.png")
+
+ # To regenerate data
+ skybox, ground, objects = segment_stuff(
+ "../sunny_mountain.png", "sunny_depth_map.png"
+ )
+
+ # Skybox mask only
+ skybox_mask = mask_skybox("../forest.png", "outputs/depth_map.png")
+
+ # Save the data to files
+ np.save("outputs/skybox.npy", skybox)
+ np.save("outputs/ground.npy", ground)
+ np.save("outputs/objects.npy", objects)
+ """
+ SKYBOX = np.load("outputs/skybox.npy")
+ GROUND = np.load("outputs/ground.npy")
+ OBJECTS = np.load("outputs/objects.npy")
+
+ complete_segments(
+ Image.open("../sunny_mountain.png"),
+ np.asarray(open3d.io.read_image("sunny_depth_map.png")) / (2**16 - 1),
+ SKYBOX,
+ GROUND,
+ OBJECTS,
+ )
+
+ # np.save("mask.npy", skybox_mask)
diff --git a/environment/sunny_depth_colored.png b/environment/sunny_depth_colored.png
new file mode 100644
index 0000000..8bb9972
Binary files /dev/null and b/environment/sunny_depth_colored.png differ
diff --git a/environment/sunny_depth_map.png b/environment/sunny_depth_map.png
new file mode 100644
index 0000000..7936ced
Binary files /dev/null and b/environment/sunny_depth_map.png differ
diff --git a/grid.png b/grid.png
new file mode 100644
index 0000000..9897772
Binary files /dev/null and b/grid.png differ
diff --git a/requirements-optional.txt b/requirements-optional.txt
new file mode 100644
index 0000000..de4cf03
--- /dev/null
+++ b/requirements-optional.txt
@@ -0,0 +1,3 @@
+PyAudio~=0.2.14 # For speech-to-text only
+datasets~=2.18.0 # For speech-to-text only
+# audiocraft~=1.2.0 # for text-to-sound, won't work, see https://github.com/facebookresearch/audiocraft?tab=readme-ov-file#installation
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..7a1c942
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,12 @@
+torch~=2.3.0 # see https://pytorch.org/get-started/locally/ when it gets incompatible with the CUDA version
+transformers~=4.41.2
+pillow~=10.4.0
+diffusers~=0.29.2
+safetensors~=0.4.2
+numpy~=1.26.4
+scikit-image~=0.23.2
+scikit-learn~=1.4.1.post1
+torchvision~=0.18.0
+matplotlib~=3.9.0
+open3d~=0.18.0
+scipy~=1.12.0
diff --git a/server/run.py b/server/run.py
new file mode 100644
index 0000000..7235bfc
--- /dev/null
+++ b/server/run.py
@@ -0,0 +1,533 @@
+"""
+Starts a Python server using sockets, able to pass data to various AI functions.
+"""
+
+import os
+import time
+import json
+import socket
+import socketserver
+import threading
+import warnings
+
+import torch.cuda
+from PIL import Image
+
+from asr.speech_to_text import do_audio_transcription
+from server.utils import (
+ hex_to_pillow,
+ get_server_address,
+ hex_to_bytes,
+ image_response,
+ get_configuration_data,
+)
+from server.task_tracker import TaskTracker
+from skybox.diffusion import generate_images, refine_images
+from skybox.inpainting import make_transparent_black, inpaint_panorama_pipeline
+from skybox import panorama_creator
+
+# Max chunk size for input data
+CHUNK_SIZE = 4096
+
+
+def init_server(server_ip, server_port):
+ """
+ Initialize the server with the input configuration file.
+ """
+ # Create a TCP socket
+ server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+
+ # Bind the socket to the IP address and port
+ server_socket.bind((server_ip, server_port))
+
+ # Listen for incoming connections
+ server_socket.listen(5)
+ print(f"Server is listening on {server_ip}:{server_port}")
+ return server_socket
+
+
+def server_data():
+ """The public data about this server."""
+ configuration_data = get_configuration_data()
+ data = {
+ "name": configuration_data["name"],
+ "description": configuration_data["description"],
+ "version": configuration_data["version"],
+ }
+ return data
+
+
+def completion_report(completion, client_socket, task_id):
+ """
+ Completion report for a task.
+
+ Send a completion report through the TCP connection.
+
+ :param int completion: Task completion from 0 to 100
+ :param socket.socket client_socket: TCP client socket
+ :param int task_id: Identifier of the task to check completion.
+ :return dict: Data sent
+ """
+ data = {"completion": "progress", "taskCompletion": completion, "taskId": task_id}
+ response = {"status": 200, "data": json.dumps(data), "type": "completion"}
+ send_response(response, client_socket)
+ return data
+
+
+def new_skybox_handler(prompt, advanced, progress_tracker=None):
+ """
+ Generate a new skybox and add the image to the output data.
+
+ :param string prompt: The prompt for the skybox generation.
+ :param bool advanced: Stop the generation at the first of the pipeline if true.
+ :param TaskTracker | None progress_tracker: TaskTracker object to report each step
+ """
+ if advanced:
+ height = 416
+ image = Image.new("RGB", (2504, height * 5 // 2), "black")
+ base_image = generate_images(
+ prompt, num_inference_steps=50, width=2504, height=height,
+ **{"callback_on_step_end": progress_tracker.callback if progress_tracker else None}
+ )[0]
+ image.paste(base_image, (0, height))
+ else:
+ image = panorama_creator.generate_panorama(prompt, progress_tracker=progress_tracker)
+ response = image_response(image)
+ return response
+
+
+def new_skybox_local_handler(prompt, destination_path, step_callback=None):
+ """
+ Generate a new skybox.
+
+ :param string prompt: The prompt for the skybox generation.
+ :param string destination_path: Where to save the generated image.
+ :param Callable | None step_callback: Callback function to report each step.
+ :return dict: A dictionary with "skyboxFilePath" key.
+ """
+ images = generate_images(
+ prompt, callback_on_step_end=step_callback, height=1024, width=2048
+ )
+ images[0].save(destination_path)
+ data = {"skyboxFilePath": destination_path}
+ return data
+
+
+def panorama_handler(prompt, step_callback=None):
+ """
+ Generate a new panorma skybox (no seam line) and add the image to the output data.
+
+ Deprecated: since pipeline v0.4, use new_skybox_handler instead.
+
+ :param string prompt: The prompt for the skybox generation.
+ :param Callable | None step_callback: Callback function to report each step.
+ :return dict: A dictionary containing the image bytes in hexadecimal string.
+ """
+ image = generate_images(
+ prompt, callback_on_step_end=step_callback, height=1024, width=2048
+ )[0]
+ panorama = panorama_creator.rewrite_image_borders(image)
+ cylindrical = panorama_creator.cylindrical_projection(panorama)
+ smoothed = panorama_creator.blend_borders(cylindrical, 10)
+ response = image_response(smoothed)
+ return response
+
+
+def refine_skybox_handler(image_hex, prompt, step_callback=None):
+ """
+ Refine an image with SDXL refiner.
+
+ :param str image_hex: Base image hexadecimal string, PNG format
+ :param str prompt: Prompt to guide the refining process.
+ :param Callable | None step_callback: Report function object to report each step.
+ :return dict: A dictionary with the PNG image in encoded.
+ """
+ base = hex_to_pillow(image_hex).convert("RGB")
+ image_part = base.crop((0, base.height * 2 // 5, base.width, base.height * 4 // 5))
+ refined = refine_images(
+ prompt, image_part, num_inference_steps=50, **{"callback_on_step_end": step_callback}
+ )[0]
+ base.paste(refined, (0, base.height * 2 // 5))
+ response = image_response(base)
+ return response
+
+
+def remove_seam_handler(image_hex, _step_callback=None):
+ """
+ Fixes the borders of an image to make it an asymmetric tiling.
+
+ :param str image_hex: Base image hexadecimal string, PNG format
+ :param Callable | None _step_callback: Callback function to report each step.
+ :return dict: A dictionary with the PNG image encoded.
+ """
+ image_frame = hex_to_pillow(image_hex).convert("RGB")
+ image_part = image_frame.crop(
+ (0, image_frame.height * 2 // 5, image_frame.width, image_frame.height * 4 // 5)
+ )
+ asymmetric_image = panorama_creator.rewrite_image_borders(image_part)
+ image_frame.paste(asymmetric_image, (0, image_frame.height * 2 // 5))
+ response = image_response(image_frame)
+ return response
+
+
+def extend_skybox_handler(image_hex, step_callback=None):
+ """
+ Expand the given image to create a larger skybox.
+
+ :param str image_hex: Base image hexadecimal string, PNG format
+ :param Callable | None step_callback: Callback function to report each step.
+ :return dict: A dictionary with the PNG image encoded.
+ """
+ image_frame = hex_to_pillow(image_hex).convert("RGB")
+ image_part = image_frame.crop(
+ (0, image_frame.height * 2 // 5, image_frame.width, image_frame.height * 4 // 5)
+ )
+ extended = panorama_creator.extend_image(image_part, 50, step_callback=step_callback)
+ response = image_response(extended)
+ return response
+
+
+def asr_local_handler(audio_file_path):
+ """
+ Return the transcription from an audio file.
+
+ :param str audio_file_path: Audio file path
+ :return dict: Text enclosed in "transcription" key
+ """
+ if os.path.exists(audio_file_path):
+ result = do_audio_transcription(audio_file_path)
+ print(result)
+ data = {"transcription": result["text"]}
+ else:
+ print("File does not exist")
+ data = {
+ "transcription": f"Error: input file {audio_file_path} does not exist!",
+ "message": f"Error: input file {audio_file_path} does not exist!",
+ }
+ return data
+
+
+def asr_handler(audio_bytes):
+ """
+ Return the transcription from an audio.
+
+ :param str audio_bytes: The audio as byte string, hexadecimal encoded
+ :return dict: Text enclosed in "transcription" key
+ """
+ raw_bytes = hex_to_bytes(audio_bytes)
+ result = do_audio_transcription(raw_bytes)
+ print(result)
+ return {"transcription": result["text"]}
+
+
+def inpaint_handler(image_hex, mask_image_hex, prompt, step_callback=None):
+ """
+ Inpaint (draw on) an image using a prompt, and add the image to the output data.
+
+ :param str image_hex: Hexadecimal string encoding of the image in PNG format.
+ :param str mask_image_hex: Mask image bytes, PNG format
+ :param str prompt: Prompt for inpainting
+ :param step_callback: Function to run at the end of each step f : step_number -> Any
+ :type step_callback: Callable | None
+
+ :return dict: The new inpainted image, in standard image response format.
+ """
+ init_image = hex_to_pillow(image_hex).convert("RGB")
+ mask_image = make_transparent_black(hex_to_pillow(mask_image_hex)).resize(init_image.size)
+ new_image = inpaint_panorama_pipeline(init_image, mask_image, prompt, step_callback)
+ response = image_response(new_image)
+ return response
+
+
+def inpaint_local_handler(
+ init_image_path, mask_image_path, prompt, destination_path, step_callback=None
+):
+ """
+ Inpaint (draw on) an image using a prompt.
+
+ :param str init_image_path: Base image path
+ :param str mask_image_path: Mask image path
+ :param str prompt: Prompt for inpainting
+ :param str destination_path: Destination path for the inpainted image
+ :param step_callback: Function to run at the end of each step f : step_number -> Any
+ :type step_callback: Callable | None
+
+ :return dict: Path to the new image, enclosed in "inpaintedFilePath" key
+ """
+ init_image = Image.open(init_image_path).convert("RGB")
+ mask_image = make_transparent_black(Image.open(mask_image_path)).resize(init_image.size)
+ new_image = inpaint_panorama_pipeline(init_image, mask_image, prompt, step_callback)
+ new_image.save(destination_path)
+ data = {"inpaintedFilePath": destination_path}
+ return data
+
+
+def send_response(response, client_socket):
+ """
+ Send a response through the client socket.
+
+ :param dict response: Response to send, a flat (not nested) dictionary.
+ :param socket.socket client_socket: Socket to send the response
+ """
+ str_dump = json.dumps(response)
+ client_socket.sendall(str_dump.encode())
+
+
+def start_task(task_dict, tracker):
+ """
+ Start a new server task.
+
+ :param dict task_dict: Dictionary of data about this task
+ :param TaskTracker | None tracker: Object to call on step end
+ :return dict: Dictionary containing the response to this task.
+ """
+ result = None
+ print(f"Starting task: {task_dict['type']}")
+
+ if task_dict["type"] == "new-skybox-local":
+ result = new_skybox_local_handler(
+ task_dict["prompt"],
+ task_dict["outputFilePath"],
+ step_callback=tracker.callback,
+ )
+ elif task_dict["type"] == "new-skybox":
+ result = new_skybox_handler(
+ task_dict["prompt"], bool(task_dict["quick"]), progress_tracker=tracker
+ )
+ elif task_dict["type"] == "panorama":
+ result = panorama_handler(task_dict["prompt"], step_callback=tracker.callback)
+ elif task_dict["type"] == "inpainting-local":
+ result = inpaint_local_handler(
+ task_dict["imagePath"],
+ task_dict["maskPath"],
+ task_dict["prompt"],
+ task_dict["outputFilePath"],
+ step_callback=tracker.callback,
+ )
+ elif task_dict["type"] == "inpainting":
+ result = inpaint_handler(
+ task_dict["imageBytes"],
+ task_dict["maskBytes"],
+ task_dict["prompt"],
+ step_callback=tracker.callback,
+ )
+ elif task_dict["type"] == "refine-skybox":
+ result = refine_skybox_handler(
+ task_dict["imageBytes"],
+ task_dict["prompt"],
+ step_callback=tracker.callback,
+ )
+ elif task_dict["type"] == "remove-seam":
+ result = remove_seam_handler(
+ task_dict["imageBytes"],
+ _step_callback=tracker.callback,
+ )
+ elif task_dict["type"] == "extend-skybox":
+ result = extend_skybox_handler(
+ task_dict["imageBytes"],
+ step_callback=tracker,
+ )
+ elif task_dict["type"] == "asr-local":
+ result = asr_local_handler(task_dict["audioPath"])
+ elif task_dict["type"] == "asr":
+ result = asr_handler(task_dict["audioBytes"])
+ elif task_dict["type"] == "ping":
+ result = {
+ "queryTimestamp": task_dict["queryTimestamp"],
+ "responseTimestamp": int(time.time() * 1000),
+ "responseMilliseconds": int(time.time() * 1000)
+ - task_dict["queryTimestamp"],
+ }
+
+ if result is None:
+ raise NotImplementedError(
+ f"The task '{task_dict['type']}' is not recognized as a valid task type."
+ )
+
+ return result
+
+
+def prepare_response(json_data, tracker):
+ """
+ Prepare a response to be sent after a query.
+
+ :param dict json_data: Response dictionary
+ :param TaskTracker | None tracker: TaskTracker tp report completion.
+ :return dict response: Response dictionary
+ """
+ if "reportCompletion" not in json_data or json_data["reportCompletion"] == 0:
+ tracker = None
+ if json_data["type"] == "info":
+ answer_data = server_data()
+ response = {
+ "status": 200,
+ "data": json.dumps(answer_data),
+ "message": "Info",
+ "type": "info",
+ }
+ elif json_data["type"] == "completion":
+ response = {
+ "status": 200,
+ "data": json.dumps({"completion": 0}),
+ "type": "completion",
+ }
+ elif json_data["type"] in (
+ "ping",
+ "new-skybox-local",
+ "new-skybox",
+ "panorama",
+ "refine-skybox",
+ "inpainting-local",
+ "inpainting",
+ "remove-seam",
+ "extend-skybox",
+ "asr-local",
+ "asr",
+ ):
+ response = {
+ "status": 200,
+ "taskId": json_data["taskId"],
+ "type": json_data["type"],
+ }
+ try:
+ answer_data = start_task(json_data, tracker)
+ except torch.cuda.OutOfMemoryError as err:
+ response = {
+ "status": 500,
+ "data": json.dumps({}),
+ "message": f"Out of memory: {err}",
+ "type": "error",
+ }
+ else:
+ response["data"] = json.dumps(answer_data)
+ else:
+ response = {
+ "status": 404,
+ "data": json.dumps({}),
+ "message": f"Unknown type: {json_data['type']}",
+ "type": "error",
+ }
+ return response
+
+
+def safe_send(response, client_socket):
+ """Safely send a response to the client, handling large responses by fragmenting them."""
+ size_limit = 8196 * 16
+ if len(response["data"]) > size_limit:
+ n_fragments = len(response["data"])
+ capsule = {}
+ capsule.update(response)
+ capsule["status"] = 206
+ capsule["total_fragments"] = n_fragments // size_limit
+ print(f"Fragmenting response in {capsule['total_fragments']} fragments.")
+ for i in range(capsule["total_fragments"]):
+ fragment = response["data"][i * size_limit: i * size_limit + size_limit]
+ capsule["data"] = fragment
+ print(len(fragment))
+ capsule["index"] = i
+ send_response(capsule, client_socket)
+ else:
+ send_response(response, client_socket)
+
+
+def handle_query(data, client_socket):
+ """
+ Handle a query from a client.
+
+ :param str data: Data received from client
+ :param socket.socket client_socket: The client socket
+ """
+ if not data or data.isspace():
+ print("Empty data, aborting")
+ response = {"status": 400, "data": json.dumps({}), "message": "Empty data"}
+ else:
+ try:
+ json_data = json.loads(data)
+ except ValueError as error:
+ print("Data is not json")
+ response = {
+ "status": 304,
+ "data": json.dumps({}),
+ "message": f"Wrong JSON: {error}",
+ }
+ else:
+ tracker = TaskTracker(client_socket, json_data["taskId"], completion_report)
+ response = prepare_response(json_data, tracker)
+
+ send_response(response, client_socket)
+
+
+def wait_for_connection(server_socket):
+ """
+ Wait for a connection from a client.
+
+ :param socket.socket server_socket: Server socket
+ """
+ # Accept incoming connection
+ client_socket, client_address = server_socket.accept()
+ print(f"Client {client_address} connected.")
+
+ # Receive data from client
+ bytes_buffer = []
+ while True:
+ bytes_read = client_socket.recv(CHUNK_SIZE)
+ bytes_buffer.append(bytes_read)
+ if len(bytes_read) == 0:
+ warnings.warn("Received 0 bytes from client")
+ elif len(bytes_read) == 1:
+ print("Received short data " + bytes_read.decode())
+ elif bytes_read[-2] != b"\\" and bytes_read.endswith(b"}"):
+ break
+ query_string = b"".join(bytes_buffer).decode()
+ try:
+ handle_query(query_string, client_socket)
+ except ConnectionResetError:
+ print("Connection reset during transmission.")
+
+ # Close the connection
+ client_socket.close()
+
+
+def handle(client_socket, client_address):
+ """Read the data until termination and take action."""
+ # self.request is the TCP socket connected to the client
+ bytes_buffer = []
+ while True:
+ bytes_read = client_socket.recv(CHUNK_SIZE)
+ bytes_buffer.append(bytes_read)
+ if len(bytes_read) == 0:
+ warnings.warn("Received 0 bytes from client")
+ elif len(bytes_read) == 1:
+ print("Received short data " + bytes_read.decode())
+ elif bytes_read[-2] != b"\\" and bytes_read.endswith(b"}"):
+ break
+ query_string = b"".join(bytes_buffer).decode()
+ print(f"Request from {client_address[0]}:{client_address[1]}")
+ handle_query(query_string, client_socket)
+
+
+class TCPHandler(socketserver.StreamRequestHandler):
+ """Instantiates the server."""
+
+ def handle(self):
+ """Define our to receive data, just a wrapper for the handle function."""
+ handle(self.request, self.client_address)
+
+
+def run_server(forked_server=True):
+ """Start the server."""
+ server_ip, server_port = get_server_address()
+
+ # Create the server
+ server = socketserver.TCPServer((server_ip, server_port), TCPHandler)
+ with server:
+ print(f"Starting server on {server_ip}:{server_port}")
+ if forked_server:
+ server_thread = threading.Thread(target=server.serve_forever)
+ server_thread.start()
+ else:
+ server.serve_forever()
+
+
+if __name__ == "__main__":
+ run_server(False)
diff --git a/server/task_tracker.py b/server/task_tracker.py
new file mode 100644
index 0000000..6562df7
--- /dev/null
+++ b/server/task_tracker.py
@@ -0,0 +1,54 @@
+"""
+A task tracker object keeps track of a completion of a task and send reports at regular intervals.
+"""
+import time
+
+
+class TaskTracker:
+ """Tracks a long task."""
+
+ def __init__(self, socket, task_id, reporter):
+ """
+ Start a task tracker.
+
+ :param socket: Client socket to send reports to.
+ :param int task_id: ID of the task being tracked.
+ :param Callable reporter: Function to call to send reports.
+ """
+ self.socket = socket
+ self.task_id = task_id
+ self.reporter = reporter
+ self.progress = 0
+ self.last_report_time = 0
+
+ def sending_report(self):
+ """Check if the completion report should be sent."""
+ send_report = time.time() - self.last_report_time > 2
+ if send_report:
+ self.last_report_time = time.time()
+ return send_report
+
+ def callback(self, pipe, step_index, _tensor, tensor_callback):
+ """Callback function to pass to a diffusion model."""
+ self.progress = step_index * 100 // pipe.num_timesteps
+ if self.sending_report():
+ self.reporter(self.progress, self.socket, self.task_id)
+ return tensor_callback
+
+ def incomplete_callback(self, max_progress):
+ """
+ Emulates an "incomplete progress": when a task needs several diffusion models.
+
+ :param int max_progress: Max progress that can be set by this task.
+ :return Callable: Function to pass to the diffusion model.
+ """
+ initial_progress = self.progress
+
+ def local_faker(pipe, step_index, _tensor, tensor_callback):
+ """Callback function to pass to a diffusion model that fakes the completion status."""
+ self.progress = initial_progress + step_index * max_progress // pipe.num_timesteps
+ if self.sending_report():
+ self.reporter(self.progress, self.socket, self.task_id)
+ return tensor_callback
+
+ return local_faker
diff --git a/server/utils.py b/server/utils.py
new file mode 100644
index 0000000..3c79435
--- /dev/null
+++ b/server/utils.py
@@ -0,0 +1,74 @@
+"""
+Various utility functions for the server,
+"""
+
+import os
+import io
+import json
+
+from PIL import Image
+
+
+def hex_to_bytes(hex_string):
+ """
+ Convert a hex string to bytes.
+
+ :param str hex_string: Hex string is in C# format, separated by dashes.
+ :return bytes: Bytes object
+ """
+ return bytes.fromhex(hex_string.replace("-", ""))
+
+
+def hex_to_pillow(hex_string):
+ """
+ Take a hex string and convert it to a pillow image.
+
+ :param str hex_string: Hex string in C# format, separated by dashes.
+ :return PIL.Image.Image: Decoded Pillow image.
+ """
+ base_image_io = io.BytesIO(hex_to_bytes(hex_string))
+ return Image.open(base_image_io)
+
+
+def get_image_bytes(image):
+ """
+ Return the bytes composing a PNG image.
+
+ :param PIL.Image.Image image: Input image to get bytes from.
+ :return bytes: Bytes object
+ """
+ img_byte_arr = io.BytesIO()
+ image.save(img_byte_arr, format="PNG")
+ return img_byte_arr.getvalue()
+
+
+def get_configuration_data():
+ """
+ Configuration data for the server.
+
+ :return dict: Server configuration data from a JSON file.
+ """
+ with open(os.path.join(os.path.dirname(__file__), "../api.json"), encoding="utf-8") as file:
+ configuration_data = json.load(file)
+ return configuration_data
+
+
+def get_server_address():
+ """Return the suggested IP and port for the server."""
+ configuration_data = get_configuration_data()
+ # Specify the IP address and port the server will listen on
+ server_ip = configuration_data["serverIp"]
+ server_port = configuration_data["serverPort"]
+ return server_ip, server_port
+
+
+def image_response(image):
+ """
+ A classical image response, image encoded in hexadecimal bytes.\
+
+ :param PIL.Image.Image image: The image to return.
+ :return dict: Response data with the key 'imageHexBytes'.
+ """
+ skybox_bytes = get_image_bytes(image)
+ data = {"imageHexBytes": skybox_bytes.hex()}
+ return data
diff --git a/skybox/diffusion.py b/skybox/diffusion.py
new file mode 100644
index 0000000..eb26947
--- /dev/null
+++ b/skybox/diffusion.py
@@ -0,0 +1,134 @@
+"""
+Simple(st) diffusion network,
+based on https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0.
+
+Generates an image after prompt.
+"""
+import warnings
+
+from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline
+import torch
+
+
+def show_images(images):
+ """
+ Show the first five images.
+
+ :param list[PIL.Image.Image] images: Images.
+ """
+ for i in range(min(len(images), 5)):
+ images[i].show()
+
+
+def is_power_of_two(n):
+ """Check if a number is a power of two."""
+ return n > 0 and (n & (n - 1)) == 0
+
+
+def get_image_generation_pipeline():
+ """Load a text-to-image pipeline from Hugging Face for SDXL base."""
+ return StableDiffusionXLPipeline.from_pretrained(
+ "stabilityai/stable-diffusion-xl-base-1.0",
+ torch_dtype=torch.float16,
+ use_safetensors=True,
+ variant="fp16",
+ )
+
+
+def get_image_refinement_pipeline():
+ """Load an image-to-image pipeline from Hugging Face using Stable Diffusion XL."""
+ return StableDiffusionXLImg2ImgPipeline.from_pretrained(
+ "stabilityai/stable-diffusion-xl-refiner-1.0",
+ torch_dtype=torch.float16,
+ variant="fp16",
+ use_safetensors=True,
+ )
+
+
+def generate_images(prompt, num_inference_steps=50, height=1024, width=None, **pipe_kwargs):
+ """
+ Generate an image from the given prompt, using a diffusion network.
+
+ Note: for best results with SDXL, height * width should be equal to 1024*1024.
+
+ :param prompt: The prompt for the image.
+ :type prompt: str | tuple[str] | list[str]
+ :param int num_inference_steps: Number of denoising steps
+ :param int height: Image height, should be a power of two
+ :param int width: Image width, if left to None it will be equal to 1024*1024 // height
+ :param dict pipe_kwargs: Additional arguments to pass to the pipeline.
+ :return list[PIL.Image.Image]: Generated images
+ """
+ if width is None:
+ width = 1024 * 1024 // height
+ if not is_power_of_two(height) and not is_power_of_two(width):
+ warnings.warn(
+ f"Specified dimensions {width} * {height} are not powers of two, proceed with care."
+ )
+ elif not is_power_of_two(height):
+ warnings.warn(
+ f"Specified image height {height} is not a power of two, you may run into issues."
+ )
+ elif not is_power_of_two(width):
+ warnings.warn(
+ f"Specified image width {width} is not a power of two, you may run into issues."
+ )
+
+ if width * height != 1024 * 1024:
+ print(
+ "width * height should be equal to 1024 * 1024 for better results.",
+ f"Current is {width} * {height}."
+ )
+
+ pipe = get_image_generation_pipeline().to("cuda")
+ # If more VRAM needed
+ # pipe.enable_model_cpu_offload()
+
+ # If computation takes a long time (Linux only)
+ # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+
+ return pipe(
+ prompt=prompt,
+ num_inference_steps=num_inference_steps,
+ height=height,
+ width=width,
+ **pipe_kwargs
+ ).images
+
+
+def refine_images(prompt, init_image, num_inference_steps=15, **pipe_kwargs):
+ """
+ Refine a batch of images using the diffusion network.
+
+ :param str | list[str] prompt: The prompt for the refined image.
+ :param PIL.Image.Image init_image: The initial image to refine.
+ :param int num_inference_steps: The number of inference steps for the refinement process.
+ :param dict pipe_kwargs: Additional keyword arguments to pass to the pipeline.
+ :return list[PIL.Image.Image]: A list of refined images.
+ """
+ pipe = get_image_refinement_pipeline().to("cuda")
+
+ return pipe(
+ prompt, image=init_image, num_inference_steps=num_inference_steps, **pipe_kwargs
+ ).images
+
+
+def main():
+ """Main demo for the diffusion model."""
+ demand = input(
+ "What would you like to generate? (Empty: An astronaut riding a green horse) "
+ )
+ if not demand or demand.strip().isspace():
+ demand = "An astronaut riding a green horse"
+ batch_size = 1
+ inference_steps = 50
+ results = generate_images(
+ [demand] * batch_size, num_inference_steps=inference_steps, height=512, width=2048,
+ )
+ show_images(results)
+ results = refine_images([demand] * batch_size, results, inference_steps)
+ show_images(results)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/skybox/image_processing.py b/skybox/image_processing.py
new file mode 100644
index 0000000..1a2aaa8
--- /dev/null
+++ b/skybox/image_processing.py
@@ -0,0 +1,283 @@
+"""
+Various image edition functions.
+"""
+
+import itertools
+
+from PIL import Image, ImageDraw
+import numpy as np
+
+
+def split_base_image(img):
+ """Split an image in two and return left and right parts."""
+ width, height = img.size
+ position = width // 2
+ left_image = img.crop((0, 0, position, height))
+ right_image = img.crop((position, 0, width, height))
+ return left_image, right_image
+
+
+def flip_image_sides(img):
+ """
+ Take an input image, split it in the middle and flip both parts.
+
+ :param PIL.Image.Image img: Base input image (won't be changed)
+ :return PIL.Image.Image: Image with the same dimension but parts flipped
+ """
+ left_img, right_img = split_base_image(img)
+
+ n_right_img = left_img.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
+ n_left_img = right_img.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
+
+ out_image = Image.new(img.mode, img.size)
+ out_image.paste(n_right_img, (0, 0))
+ out_image.paste(n_left_img, (n_right_img.width, 0))
+ return out_image
+
+
+def paste_borders(background, left_img, right_img):
+ """Paste the borders onto an image."""
+ size = background.size
+
+ n_right_img = left_img.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
+ n_left_img = right_img.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
+
+ # Find the center of the second image
+ width, height = background.size
+ center = (width // 2, height // 2)
+
+ # Calculate the top-left corner of the white square image
+ top_left = (center[0] - size[0] // 2, center[1] - size[1] // 2)
+ top_right = (center[0] + size[0] // 2, center[1] + size[1] // 2)
+
+ # Paste the images onto the background image
+ background.paste(n_right_img, top_left)
+ background.paste(n_left_img, top_right)
+
+
+def concatenate_borders(left_image, right_image):
+ """
+ Create a new image with the border added, return the image.
+
+ :param PIL.Image.Image left_image: Image to concatenate on the left
+ :param PIL.Image.Image right_image: Image to concatenate on the right
+ """
+ # Get the width and height of the two images
+ width1, height1 = left_image.size
+ width2, height2 = right_image.size
+
+ # Calculate the width and height of the new image
+ new_width = width1 + width2
+ new_height = max(height1, height2)
+
+ # Create a new blank image with white background
+ new_image = Image.new("RGB", (new_width, new_height))
+
+ # Paste the first image onto the new image at position (0, 0)
+ new_image.paste(left_image, (0, 0))
+
+ # Paste the second image onto the new image at position (width1 + center_width, 0)
+ new_image.paste(right_image, (width1, 0))
+
+ return new_image
+
+
+def horizontal_carrousel(base_image, left_translation):
+ """
+ Crop the image at a specific horizontal point to do a carrousel.
+
+ The right side of the image will be sent on the left like in Pacman.
+
+ :param PIL.Image.Image base_image: Image to carrousel.
+ :param int left_translation: Number of pixels to translate the image by.
+ A negative value translates to the left.
+
+ :return PIL.Image.Image: New image translated.
+ """
+ if left_translation < 0:
+ left_translation = base_image.width + left_translation
+ left_image = base_image.crop((0, 0, left_translation, base_image.height))
+ right_image = base_image.crop((left_translation, 0, base_image.width, base_image.height))
+ output_image = base_image.copy()
+ output_image.paste(right_image)
+ output_image.paste(left_image, (base_image.width - left_translation, 0))
+ return output_image
+
+
+def box_mean_color(img, box):
+ """
+ Get the mean pixel value of a portion of image.
+
+ :param PIL.Image.Image img: Image to take pixels from
+ :param box: Box delimitation in format (left, top, right, bottom)
+ :type box: tuple[int, int, int, int]
+
+ :return tuple[int, int, int]: mean pixel color
+ """
+ diffs = box[2] - box[0], box[3] - box[1]
+ pixels = img.crop(box).load()
+ average_sky = np.mean(
+ [pixels[pos] for pos in itertools.product(range(diffs[0]), range(diffs[1]))],
+ axis=0,
+ )
+ return tuple(map(int, average_sky))
+
+
+def draw_gradient_box(img, position, size, start_color, end_color):
+ """
+ Draw a box as a gradient between two points.
+
+ :param PIL.Image.Image img: Base image to draw rectangle on
+ :param tuple[int, int] position: top-left corner where to start the box
+ :param tuple[int, int] size: Size of the box to draw
+ :param tuple[int, int, int] start_color: Color at the beginning of the box
+ :param tuple[int, int, int] end_color: Color at the end of the box
+ """
+ draw = ImageDraw.Draw(img)
+
+ x, y = position
+ width, height = size
+ for i in range(width):
+ color = [
+ int(start_color[c] + (end_color[c] - start_color[c]) * i / width)
+ for c in range(3)
+ ]
+ draw.line([(x + i, y), (x + i, y + height)], tuple(color))
+
+
+# 2D polar geometry functions
+
+
+def cartesian_to_polar(pos, origin=(0, 0)):
+ """
+ Polar coordinates from cartesian one.
+
+ :param tuple[float, float] pos: (x, y) position in cartesian coordinates
+ :param tuple[float, float] origin: Relative to
+ :return tuple[float, float]: Radius and angle
+ """
+ vector = np.array(pos) - origin
+ return np.linalg.norm(vector), np.arctan2(vector[1], vector[0])
+
+
+def cartesian_to_polar_batch(pos, origin=(0, 0)):
+ """
+ Polar coordinates from cartesian one, applied on a batched of positions.
+
+ :param numpy.ndarray | tuple | list pos: (x, y) positions in cartesian coordinates
+ :param tuple[float, float] | float origin: Relative to
+ :return numpy.ndarray: Array of radii and angles
+ """
+ vector = np.array(pos) - origin
+ return np.dstack(
+ (np.linalg.norm(vector, axis=1), np.arctan2(vector[:, 1], vector[:, 0]))
+ )[0]
+
+
+def polar_to_cartesian(pos, origin=(0, 0)):
+ """
+ Convert from polar coordinates to cartesian ones.
+
+ :param tuple[float, float] pos: (radius, angle) position in polar coordinates
+ :param tuple[float, float] origin: Cartesian axis origin
+ :return tuple[float, float]: Position as (x, y)
+ """
+ return origin + pos[0] * np.array([np.cos(pos[1]), np.sin(pos[1])])
+
+
+# Image distortion
+
+
+def distort_image(img, inner_radius=None):
+ """
+ Create an image distorted to fit on a circle.
+
+ With an initial image of dimensions (width, height),
+ the new image has dimensions ((height + inner_radius) * 2, (height + inner_radius) * 2).
+ All modified pixels are in a circle of radius height.
+
+ :param PIL.Image.Image img: Base image.
+ :param int | None inner_radius: Radius of a white circle to add (optional)
+ :return PIL.Image.Image: New image in a circle.
+ """
+ # New image dimensions
+ canvas_size = img.height * 2 + (inner_radius or 0) * 2
+
+ # Get the polar coordinate of each pixel in the new image, format [[radius, angle], ...]
+ # Max radius is sqrt(2) * canvas_size / 2 (corners)
+ grid = tuple(itertools.product(range(canvas_size), range(canvas_size)))
+ polar_coordinates = cartesian_to_polar_batch(grid, canvas_size / 2)
+
+ # Select the indices of the pixels that should be changed in the new image
+ insiders = np.nonzero(
+ np.logical_and(
+ # Inside painted circle
+ polar_coordinates[:, 0] < canvas_size / 2,
+ # And outside mask
+ polar_coordinates[:, 0] >= (inner_radius or 0),
+ )
+ )[0]
+
+ # Acquire position on base image, cast to [0, img.size - 1]
+ adapted_pos = (
+ polar_coordinates[insiders]
+ * (np.array(img.size[::-1]) - 1)
+ / (canvas_size / 2, 2 * np.pi)
+ )
+ adapted_pos[:, 1] += (img.width - 1) / 2
+
+ # Round to int and swap the last dimensions (return to image format)
+ slicer = np.round(adapted_pos).astype(np.uint16)
+ new_pixels = np.asarray(img)[slicer[:, 0], slicer[:, 1]]
+
+ new_img_data = np.zeros((canvas_size * canvas_size, 3), dtype=np.uint8)
+ # Assign to the new image
+ new_img_data[insiders] = new_pixels
+
+ return Image.fromarray(new_img_data.reshape(canvas_size, canvas_size, 3))
+
+
+def unroll_top_image(img, width=None):
+ """
+ Unroll a polar projected image to standard format.
+
+ Take an image fitting in a circle, and unrolls it.
+ """
+ canvas_size = (width or img.size[0] // 2), img.size[1] // 2
+ # Create a white background image
+ new_img = Image.new("RGB", canvas_size, color="black")
+
+ for pos in itertools.product(range(new_img.width), range(new_img.height)):
+ # Acquire polar coordinates corresponding to this position (radius, angle)
+ adapted_pos = (
+ pos[1] * img.height / 2 / new_img.height,
+ pos[0] * 2 * np.pi / new_img.width,
+ )
+ # Position in the base image space
+ adapted_pos = polar_to_cartesian(adapted_pos, (img.width / 2, img.height / 2))
+ # Reduce to base image pixel space
+ pixel_pos = int(adapted_pos[0]), int(adapted_pos[1])
+ # print(pos, polar_pos, pixel_pos)
+ new_img.putpixel(pos, img.getpixel(pixel_pos))
+
+ return new_img
+
+
+def image_polar_to_rect(img, width=None):
+ """Take a polar (fisheye) image, and display it on a rectangle."""
+ base_radius = img.size[1] // 2
+ rect_size = (base_radius if width is None else width), base_radius
+ new_img = Image.new("RGB", rect_size, "white")
+ for radius in np.linspace(0, base_radius, base_radius - 1, endpoint=False):
+ for theta in np.linspace(0, 2 * np.pi, img.size[0], endpoint=False):
+ canvas_pos = (
+ int(base_radius + radius * np.cos(theta)),
+ int(base_radius + radius * np.sin(theta)),
+ )
+ new_img.putpixel(
+ canvas_pos,
+ img.getpixel(
+ (int(theta / (2 * np.pi) * (img.size[0] - 1)), int(radius))
+ ),
+ )
+ new_img.show()
diff --git a/skybox/inpainting.py b/skybox/inpainting.py
new file mode 100644
index 0000000..55c4301
--- /dev/null
+++ b/skybox/inpainting.py
@@ -0,0 +1,319 @@
+"""
+Image inpainting.
+
+For general usage, see https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint
+
+First version stable diffusion 1.2 base:
+https://huggingface.co/runwayml/stable-diffusion-inpainting
+Second version stable diffusion 2 base:
+https://huggingface.co/stabilityai/stable-diffusion-2-inpainting
+Third version stable diffusion xl 1.0:
+https://huggingface.co/diffusers/stable-diffusion-xl-1.0-inpainting-0.1
+"""
+
+import enum
+import warnings
+
+from diffusers import StableDiffusionXLInpaintPipeline
+from PIL import Image, ImageFilter
+import numpy as np
+import torch
+from skimage.restoration.inpaint import inpaint_biharmonic
+
+from skybox import image_processing
+
+
+class InpaintingFilling(enum.Enum):
+ """
+ Inpainting filling masks.
+
+ SAME: do not edit the input image.
+ AVERAGE: replace the masked area with the average pixel value.
+ MEAN_GREY: replace the masked area with a uniform grey mask.
+ BIHARMONIC: bi-harmonic interpolation,
+ see skimage.restoration.inpaint_biharmonic for more details
+ """
+
+ SAME = 1
+ AVERAGE = 2
+ MEAN_GREY = 3
+ BIHARMONIC = 4
+ RANDOM = 5
+
+
+def make_transparent_black(image):
+ """
+ From an RGBA image, make the transparent pixels black.
+
+ :param PIL.Image.Image image: Base RGBA image
+ :return PIL.Image.Image: Mask image in grayscale format (L).
+ """
+ # Convert to grayscale format
+ grayscale = image.convert("L")
+ # Iterate through each pixel in the image
+ for y in range(grayscale.height):
+ for x in range(grayscale.width):
+ # If the alpha value is less than 255 (transparent), set the pixel to black
+ if image.getpixel((x, y))[-1] < 255:
+ grayscale.putpixel((x, y), 0)
+ return grayscale
+
+
+def center_on_mask(mask_image):
+ """
+ Translate an image horizontally so that the mask is centered.
+
+ :param PIL.Image.Image mask_image: Mask image where to find the mean point.
+
+ :return: How many pixels should be translated, and if the mask goes across the image.
+ :rtype: tuple[int, bool]
+ """
+ mask_x_pos = np.asarray(mask_image).nonzero()[1]
+ mean_point = int(np.mean(mask_x_pos))
+ mask_x_extend = np.min(mask_x_pos), np.max(mask_x_pos)
+ if mask_x_extend[0] > 0 or mask_x_extend[1] < mask_image.width - 1:
+ return mean_point, False
+
+ dummy_mask_translation = image_processing.horizontal_carrousel(
+ mask_image, mask_image.width // 2
+ )
+ mask_x_pos = np.asarray(dummy_mask_translation).nonzero()[1]
+ mean_point = int(np.mean(mask_x_pos))
+ mask_x_extend = np.min(mask_x_pos), np.max(mask_x_pos)
+ if mask_x_extend[0] == 0 and mask_x_extend[1] == mask_image.width - 1:
+ warnings.warn("Seems like the mask is too large!")
+ return mask_image.width - mean_point, True
+
+
+def fill_masked_area(image, mask, inpainting_filling=InpaintingFilling.SAME):
+ """Fill a masked area of the given image with a specific strategy."""
+ if inpainting_filling == InpaintingFilling.SAME:
+ return image
+ if inpainting_filling == InpaintingFilling.AVERAGE:
+ # Use the average pixel value
+ image_data = np.asarray(image)
+ mask_data = np.asarray(mask)
+ pixels = image_data[mask_data != 0]
+ mean_pixel = np.mean(pixels, axis=0).astype(np.uint8)
+ area = Image.new(image.mode, image.size, color=tuple(mean_pixel))
+ masked_image = image.copy()
+ masked_image.paste(area, mask=mask)
+ return masked_image
+ if inpainting_filling == InpaintingFilling.MEAN_GREY:
+ # Equalize with grey
+ grey_area = Image.new(image.mode, image.size, color="grey")
+ masked_image = image.copy()
+ masked_image.paste(grey_area, mask=mask)
+ return masked_image
+ if inpainting_filling == InpaintingFilling.BIHARMONIC:
+ # Bi-harmonic filling
+ image_data = np.asarray(image)
+ mask_data = np.asarray(mask)
+ inpainted = (
+ inpaint_biharmonic(image_data, mask_data, channel_axis=-1) * 255
+ ).astype(np.uint8)
+ return Image.fromarray(inpainted)
+ if inpainting_filling == InpaintingFilling.RANDOM:
+ # Adds only random values
+ image_data = np.asarray(image)
+ mask_data = np.asarray(mask)
+ rng = np.random.default_rng(1)
+ noise_data = rng.integers(
+ 0, 255, image_data.shape
+ ) * mask_data.reshape(*mask_data.shape, 1)
+ masked_image = image.copy()
+ masked_image.paste(Image.fromarray(noise_data.astype(np.uint8)), mask=mask)
+ return masked_image
+ raise ValueError
+
+
+def get_inpainting_pipeline():
+ """
+ This function initializes and returns a pre-trained Stable Diffusion XL inpainting pipeline.
+
+ The pipeline is loaded from the Hugging Face model hub.
+ The pipeline is set to use half-precision (float16) for faster inference and lower memory usage.
+
+ :return: A pre-trained Stable Diffusion XL inpainting pipeline.
+ """
+ return StableDiffusionXLInpaintPipeline.from_pretrained(
+ "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
+ torch_dtype=torch.float16,
+ variant="fp16",
+ )
+
+
+def inpaint_image(prompts, image, mask_image, negative_prompt=None, **pipe_kwargs):
+ """
+ Apply the prompt to do the inpainting.
+
+ Side effect: reduce the quality of the image, even outside the mask.
+
+ :param str or list[str] prompts: Prompts to use
+ :param PIL.Image.Image image: Base image
+ :param mask_image: Mask to apply. The mask is white for inpainting and black for keeping as is.
+ :type mask_image: PIL.Image.Image
+ :param str negative_prompt: Negative prompt to apply
+ :return list[PIL.Image.Image]: Inpainted images
+ """
+ pipe = get_inpainting_pipeline().to("cuda")
+
+ return pipe(
+ prompt=prompts,
+ image=image,
+ mask_image=mask_image,
+ negative_prompt=negative_prompt,
+ height=image.height,
+ width=image.width,
+ strength=0.9,
+ **pipe_kwargs,
+ ).images
+
+
+def force_inpainting(prompts, image, mask_image, negative_prompt=None, **pipe_kwargs):
+ """
+ Apply the prompts to do the inpainting when you want to be sure that the inpainting is applied.
+
+ The inpainting will start with a random noise instead of the image,
+ generating more random results.
+
+ Side effect: reduce the quality of the image, even outside the mask.
+
+ :param str or list[str] prompts: Prompts to use
+ :param PIL.Image.Image image: Base image
+ :param mask_image: Mask to apply. The mask is white for inpainting and black for keeping as is.
+ :type mask_image: PIL.Image.Image
+ :param str negative_prompt: Negative prompt to apply
+ :return list[PIL.Image.Image]: Inpainted images
+ """
+ masked_image = fill_masked_area(image, mask_image, InpaintingFilling.RANDOM)
+ pipe_kwargs["guidance_scale"] = 20
+ pipe_kwargs["num_inference_steps"] = 25
+ return inpaint_image(
+ prompts, masked_image, mask_image, negative_prompt, **pipe_kwargs
+ )
+
+
+def image_compositing(
+ initial_image, inpainted_image, mask_image, blurring_radius, horizontal_tiling=False
+):
+ """
+ Preserve the quality of the original image by blending the original and the inpainted images.
+
+ :param PIL.Image.Image initial_image: Initial image before any inpainting.
+ :param PIL.Image.Image inpainted_image: Image after inpainting process.
+ :param PIL.Image.Image mask_image: Mask image to define the area to be inpainted.
+ :param int blurring_radius: Radius of the blurring filter applied to the mask.
+ :param bool horizontal_tiling: If True, we apply a horizontal tiling before compositing.
+ :return PIL.Image.Image final_composition: Composited image with original and inpainted parts.
+ """
+ if horizontal_tiling:
+ image_frame = Image.new(
+ initial_image.mode, (initial_image.width * 2, initial_image.height)
+ )
+ inpainted_image_frame = image_frame.copy()
+ mask_image_frame = image_frame.copy()
+ # Remark: an image of size (base_image.width + blurring_radius * 2) would be enough
+ for left_padding in range(0, initial_image.width * 2, initial_image.width):
+ image_frame.paste(initial_image, (left_padding, 0))
+ inpainted_image_frame.paste(inpainted_image, (left_padding, 0))
+ mask_image_frame.paste(mask_image, (left_padding, 0))
+ blurred_mask = mask_image_frame.filter(ImageFilter.BoxBlur(blurring_radius)).convert("L")
+ big_image = Image.composite(inpainted_image_frame, image_frame, blurred_mask)
+ final_composition = big_image.crop(
+ (initial_image.width, 0, initial_image.width * 2, initial_image.height)
+ )
+ else:
+ blurred_mask = mask_image.filter(ImageFilter.BoxBlur(blurring_radius))
+ final_composition = Image.composite(inpainted_image, initial_image, blurred_mask)
+ return final_composition
+
+
+def inpaint_panorama_pipeline(
+ init_image, mask_image, prompt, step_callback=None, blurring_radius=40
+):
+ """
+ Base framework for an inpainting.
+
+ :param PIL.Image.Image init_image: Initial image to inpaint
+ :param PIL.Image.Image mask_image: Mask image to use
+ :param str prompt: Prompt for inpainting
+ :param step_callback: Function to run at the end of each step f : step_number -> Any
+ :type step_callback: Callable | None
+ :param int blurring_radius: Size of the blurring radius to apply.
+
+ :return PIL.Image.Image: The new inpainted image
+ """
+ left_translation, should_translate = center_on_mask(mask_image)
+ # If the mask is across the borders we need to "turn" the image
+ if should_translate:
+ translated_image = image_processing.horizontal_carrousel(init_image, left_translation)
+ translated_mask = image_processing.horizontal_carrousel(mask_image, left_translation)
+ translated_result = force_inpainting(
+ prompt,
+ translated_image,
+ translated_mask,
+ callback_on_step_end=step_callback,
+ )[0]
+ new_image = image_processing.horizontal_carrousel(translated_result, -left_translation)
+ else:
+ new_image = force_inpainting(
+ prompt, init_image, mask_image, callback_on_step_end=step_callback
+ )[0]
+ # Apply the image on the mask only to avoid quality decrease
+ composited_image = image_compositing(init_image, new_image, mask_image, blurring_radius, True)
+ return composited_image
+
+
+def inpainting_demo():
+ """
+ A demo interaction of what the model can do.
+
+ This function demonstrates the usage of the model by prompting the user for a replacement,
+ to be added in the input image.
+ If the user doesn't provide a valid input, a default prompt is used.
+ """
+ demo_prompt = "A cat, high resolution, sitting"
+ prompt = input(f"What replacement do you want? [{demo_prompt}] ")
+ if not prompt or prompt.strip().isspace():
+ prompt = demo_prompt
+ image_path = input("What is the image path? [../sunny_mountain.png] ")
+ if not image_path or image_path.strip().isspace():
+ image_path = "../sunny_mountain.png"
+ base_image = Image.open(image_path).convert("RGB")
+ mask_path = input("What is the mask path? [mask.png] ")
+ if not mask_path or mask_path.strip().isspace():
+ mask_path = "mask.png"
+ mask_image = Image.open(mask_path)
+
+ print("Starting inpainting")
+ inpainted_images = inpaint_image([prompt] * 4, base_image, mask_image)
+ for im in inpainted_images:
+ im.show()
+ print("Restoring initial image quality.")
+ for im in inpainted_images:
+ image_compositing(base_image, im, mask_image, 5, True).show()
+
+
+def __regenerate_mask():
+ image = Image.open("../sunny_mountain.png")
+ # Define the size of the mask (width, height)
+ mask_size = image.size
+
+ # Create a blank mask filled with zeros
+ mask = torch.zeros(mask_size, dtype=torch.uint8)
+
+ # Set some pixels to 1 to create a binary mask
+ mask[
+ image.width // 2 : image.width // 2 + 100,
+ image.height // 2 : image.height // 2 + 100,
+ ] = 255
+
+ # Save the mask as a PNG file using Pillow
+ img = Image.fromarray(mask.numpy())
+ img.save("mask.png")
+ return img
+
+
+if __name__ == "__main__":
+ inpainting_demo()
diff --git a/skybox/legacy/diffusion_trainer.py b/skybox/legacy/diffusion_trainer.py
new file mode 100644
index 0000000..2db59a5
--- /dev/null
+++ b/skybox/legacy/diffusion_trainer.py
@@ -0,0 +1,40 @@
+"""
+Training pipeline for a diffusion network.
+"""
+
+import random
+
+from skybox.diffusion import generate_images
+from skybox.legacy.equirectangular_checker import score_image
+
+
+def random_sentence():
+ """Generate random sentences."""
+ # Sets of words
+ adjectives = ("quick", "lazy", "smart", "cute", "red")
+ nouns = ("dog", "cat", "bird", "apple", "car")
+ verbs = ("runs", "eats", "hops", "jumps", "drives")
+ adverbs = ("quickly", "slowly", "carefully", "loudly", "eagerly")
+
+ sentence = " ".join(map(random.choice, (adjectives, nouns, verbs, adverbs)))
+ return sentence
+
+
+def generate():
+ """Generate a new image."""
+ prompt = random_sentence() + " monoscopic 360 equirectangular"
+ print(prompt)
+ image = generate_images(prompt)[0]
+ image.show()
+ return image
+
+
+def evaluate(img):
+ """Evaluates a given image quality."""
+ return score_image(img)
+
+
+if __name__ == "__main__":
+ for _ in range(5):
+ score = evaluate(generate())
+ print(f"Borders variation: {score}")
diff --git a/skybox/legacy/equirectangular_checker.py b/skybox/legacy/equirectangular_checker.py
new file mode 100644
index 0000000..00aa5f9
--- /dev/null
+++ b/skybox/legacy/equirectangular_checker.py
@@ -0,0 +1,80 @@
+"""
+Evaluates how much an image variates from an equirectangular projection.
+"""
+
+import sys
+from PIL import Image
+
+
+def check_ratio(img):
+ """Check if the image's aspect ratio is 2:1"""
+ width, height = img.size
+ return width / height >= 2
+
+
+def define_boxes(img_size, subdivisions, pixels):
+ """Define the boxes of an image divided by a given number of subdivisions."""
+ width, height = img_size
+ for i in range(subdivisions):
+ box_range = i * width // subdivisions, (i + 1) * width // subdivisions
+ box = (box_range[0], 0, box_range[1], pixels)
+ opp = (box_range[0], height - pixels, box_range[1], height)
+ yield box, opp
+ for i in range(subdivisions):
+ box_range = i * height // subdivisions, (i + 1) * height // subdivisions
+ box = (0, box_range[0], pixels, box_range[1])
+ opp = (width - pixels, box_range[0], width, box_range[1])
+ yield box, opp
+
+
+def check_boundaries(img, subdivisions=10, pixels=5):
+ """Check if the boundaries of an image can be matched."""
+ diff = 0
+ for box, partner in define_boxes(img.size, subdivisions, pixels):
+ regions = img.crop(box), img.crop(partner)
+ means = [sum(regions[i].getdata()) / 255 / pixels / pixels for i in range(2)]
+ diff += (means[1] - means[0]) ** 2
+ # Method 2: diff = sum((regions[1].mirror - regions[0]) ** 2)
+ return diff / subdivisions
+
+
+def check_frontiers(img):
+ """
+ Return how different are the pixels on the opposite borders of an image.
+
+ :param PIL.Image.Image img: Input image
+ :return: Frontiers difference. 0 for identical, 1 if they are totally different.
+ :rtype: float
+ """
+ diff = 0
+ width, height = img.size
+ for y in range(width):
+ pixels = img.getpixel((0, y)), img.getpixel((width - 1, y))
+ diff += ((pixels[1] - pixels[0]) / 255) ** 2
+ for x in range(height):
+ pixels = img.getpixel((x, 0)), img.getpixel((x, height - 1))
+ diff += ((pixels[1] - pixels[0]) / 255) ** 2
+ return diff / (width + height)
+
+
+def score_image(img):
+ """
+ Return the likelyhood of the input image to be equirectangular.
+ """
+ variation = check_frontiers(img.convert("L"))
+ return variation
+
+
+def score_file(image_path):
+ """Assigns a variation score to the image."""
+ img = Image.open(image_path)
+ variation = score_image(img)
+ print("Boundary", check_boundaries(img), "frontier", check_frontiers(img))
+ return variation
+
+
+if __name__ == "__main__":
+ if len(sys.argv) != 2:
+ print("Please provide an image.")
+ sys.exit(126)
+ print("Value :", score_file(sys.argv[1]))
diff --git a/skybox/legacy/sdxl.py b/skybox/legacy/sdxl.py
new file mode 100644
index 0000000..ed2306a
--- /dev/null
+++ b/skybox/legacy/sdxl.py
@@ -0,0 +1,44 @@
+"""
+An implementation of Stable Diffusion XL with a custom checkpoint by ByteDance.
+
+From https://huggingface.co/ByteDance/SDXL-Lightning.
+"""
+
+import torch
+from diffusers import (
+ StableDiffusionXLPipeline,
+ UNet2DConditionModel,
+ EulerDiscreteScheduler,
+)
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
+
+
+def main():
+ """Main demo function."""
+ base = "stabilityai/stable-diffusion-xl-base-1.0"
+ repo = "ByteDance/SDXL-Lightning"
+ ckpt = "sdxl_lightning_4step_unet.safetensors" # Use the correct ckpt for your step setting!
+
+ # Load model.
+ unet = UNet2DConditionModel.from_config(base, subfolder="unet").to(
+ "cuda", torch.float16
+ )
+ unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device="cuda"))
+ pipe = StableDiffusionXLPipeline.from_pretrained(
+ base, unet=unet, torch_dtype=torch.float16, variant="fp16"
+ ).to("cuda")
+
+ # Ensure sampler uses "trailing" time steps.
+ pipe.scheduler = EulerDiscreteScheduler.from_config(
+ pipe.scheduler.config, timestep_spacing="trailing"
+ )
+
+ # Ensure using the same inference steps as the loaded model and CFG set to 0.
+ prompt = "A lazy cat jumping smiling 360 equirectangular monoscopic"
+ image = pipe(prompt, num_inference_steps=4, guidance_scale=0).images[0]
+ image.show()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/skybox/mask.png b/skybox/mask.png
new file mode 100644
index 0000000..c034e71
Binary files /dev/null and b/skybox/mask.png differ
diff --git a/skybox/mask_editor.py b/skybox/mask_editor.py
new file mode 100644
index 0000000..ec634e5
--- /dev/null
+++ b/skybox/mask_editor.py
@@ -0,0 +1,343 @@
+"""
+Create mask for an image so that it can be locally inpainted.
+"""
+
+import enum
+
+import numpy as np
+from PIL import Image, ImageDraw
+from sklearn.cluster import KMeans
+
+from skybox import image_processing
+
+RANDOM_SEED = 42
+
+FRAME_CONFIG = {
+ "width": 2048 + 256,
+ "height": 1024,
+ # Initial image to map
+ "base_image": {"width": 2048, "height": 1024},
+ # Extension of the image to make a cylinder
+ "horizontal_extensions": {"width": 256, "display": "both"},
+ # Sky extension to make a hemisphere
+ "top_extension": {"radius": 1024},
+}
+
+
+class ExtensionFilling(enum.Enum):
+ """
+ What type of filling to apply to an extended image.
+
+ STRETCH: stretch the image border.
+ MEAN: use th mean value of the image border.
+ GRADIENT: apply a continuous gradient from the mean value of the image border.
+ SMART: auto-extension depending on context.
+ """
+ STRETCH = 1
+ MEAN = 2
+ GRADIENT = 3
+ SMART = 4
+
+
+# Horizontal panorama zone
+
+def central_vertical_mask(image, center_width):
+ """
+ Create a vertical mask for the edition zone.
+
+ The mask will be in centered.
+
+ :param PIL.Image.Image image: Input image
+ :param int center_width: Width of the vertical mask.
+ """
+ mask = Image.new("L", image.size, "black")
+
+ mask.paste(
+ Image.new("L", (center_width, image.height), "white"),
+ (image.width // 2 - center_width // 2, 0),
+ )
+
+ return mask
+
+
+def central_circular_mask(canvas_size, inner_radius=None):
+ """
+ Create a circular mask with a specified canvas size and inner radius.
+
+ :param int canvas_size: The width or height of the output square image.
+ :param inner_radius: The radius of the inner circle. If None, a full circle is created.
+ :type inner_radius: int | None
+
+ :return PIL.Image.Image: A black square image with a white circle in the center.
+ """
+ mask = Image.new("L", (canvas_size, canvas_size), color="black")
+ # Draw a white circle on the image
+ if inner_radius is not None:
+ draw = ImageDraw.Draw(mask)
+ draw.ellipse(
+ (
+ canvas_size / 2 - inner_radius,
+ canvas_size / 2 - inner_radius,
+ canvas_size / 2 + inner_radius,
+ canvas_size / 2 + inner_radius,
+ ),
+ fill="white",
+ )
+
+ return mask
+
+
+def add_top_mask(img, radius):
+ """Add a mask on the top of the image."""
+ # Get the width and height of the two images
+ width, height = img.size
+
+ # Calculate the width and height of the new image
+ new_height = height + radius * 2
+
+ # Create a new blank image with white background
+ new_image = Image.new("RGB", (width, new_height), "white")
+ mask = Image.new("L", (width, new_height), "white")
+
+ # Paste the first image onto the new image at position (0, 0)
+ new_image.paste(img, (0, radius * 2))
+ mask.paste(Image.new("L", (width, new_height), color="black"), (0, radius * 2))
+
+ return new_image, mask
+
+
+def add_center_image(background, box_size):
+ """Add a white square to the center of an image."""
+ # Create a white square image
+ size = (box_size, box_size)
+ white_square = Image.new("RGB", size, "white")
+ # Find the center of the second image
+ width, height = background.size
+ center = (width // 2, height // 2)
+
+ # Calculate the top-left corner of the white square image
+ top_left = (center[0] - size[0] // 2, center[1] - size[1] // 2)
+
+ # Paste the white square image onto the second image
+ background.paste(white_square, top_left)
+ return white_square
+
+
+def draw_top_image(background, radius, base_height):
+ """Draw a white circle in the top image."""
+ draw = ImageDraw.Draw(background)
+
+ # Add a white box in the center of the square
+ center_x, center_y = background.size[0] // 2, background.size[1] // 2
+
+ circle_center = (center_x, center_y - base_height // 2 - radius)
+ draw.ellipse(
+ [
+ circle_center[0] - radius,
+ circle_center[1] - radius,
+ circle_center[0] + radius,
+ circle_center[1] + radius,
+ ],
+ fill="white",
+ )
+
+
+# Completion functions
+
+
+def draw_masks(base_file):
+ """
+ Draw a mask for inpainting from a base image.
+
+ The image is drawn upon, instead of creating a new image.
+ """
+ frame_object = FRAME_CONFIG
+ # Create a black square image of size 2048x1024
+ img = Image.new(
+ "RGB", (frame_object["width"], frame_object["height"]), color="black"
+ )
+
+ base_image = Image.open(base_file)
+ base_image.thumbnail(
+ (frame_object["base_image"]["width"], frame_object["base_image"]["height"])
+ )
+
+ left_img, right_img = image_processing.split_base_image(base_image)
+
+ add_center_image(img, frame_object["base_image"]["width"])
+
+ # Add borders
+ image_processing.paste_borders(img, left_img, right_img)
+
+ # Add a circle on top of the box
+ draw_top_image(
+ img,
+ frame_object["top_extension"]["radius"],
+ frame_object["base_image"]["height"],
+ )
+
+ img.show()
+
+
+def horizontal_tiling_mask(img, frame_object=None):
+ """
+ Create an image with masks so that an IA can complete it.
+
+ :param PIL.Image.Image img: Image to apply masks to
+ :param frame_object: Frame configuration object,
+ containing information about the image dimensions and extensions.
+ :type frame_object: dict
+ :return: A tuple containing the image with masks applied and the corresponding mask.
+ """
+ if frame_object is None:
+ frame_object = FRAME_CONFIG
+
+ inpaint_canvas = image_processing.flip_image_sides(img)
+ mask = central_vertical_mask(
+ img, frame_object["width"] - frame_object["base_image"]["width"]
+ )
+
+ return inpaint_canvas, mask
+
+
+def create_gradient_mask(width, height, is_horizontal=True):
+ """
+ Create a gradient mask of specified dimensions.
+
+ :param int width: Width of the mask
+ :param int height: Height of the mask
+ :param bool is_horizontal: If True, the gradient will be horizontal.
+ Otherwise, it will be vertical.
+
+ :return: A mask image with a gradient fill.
+ """
+ mask = Image.new("L", (width, height))
+ draw = ImageDraw.Draw(mask)
+
+ if is_horizontal:
+ for i in range(width):
+ draw.line([(i, 0), (i, height)], fill=int(255 * (i / width)))
+ else:
+ for i in range(height):
+ draw.line([(0, i), (width, i)], fill=int(255 * (i / height)))
+
+ return mask
+
+
+def gradient_fill(img, size):
+ """
+ Create a gradient mask of specified dimensions and apply it to the input image.
+
+ The background color is chosen as an average of the 10% brighter pixels from the top
+ of the image and the K-mean group with the least contrast.
+
+ The best solution is probably to use the deepest pixels.
+
+ :param Image.Image img: Input image to apply the gradient mask to.
+ :param int size: Height of the gradient mask.
+
+ :return: A new image with the input image's content blended with a gradient mask.
+ """
+ background = Image.new("RGBA", (img.width, size), color="white")
+ blend_mask = create_gradient_mask(background.width, background.height, False)
+
+ # Take only the 10% brighter pixels
+ pixels_array = np.asarray(img.convert("L"))
+ threshold = np.quantile(pixels_array, 0.9)
+ valid_indices = np.argwhere(pixels_array > threshold)
+ selection = np.asarray(img)[valid_indices[:, 0], valid_indices[:, 1]]
+ mean_pixel_value = np.mean(selection, axis=0).astype(np.uint8)
+
+ # Alternative path: use pixels with less contrast
+ pixels_stack = np.vstack(np.asarray(img))
+ n_clusters = 5
+ kmeans = KMeans(n_clusters=n_clusters, random_state=RANDOM_SEED).fit(pixels_stack)
+ dispersions = np.empty(n_clusters)
+ for label in range(n_clusters):
+ indices = np.argwhere(kmeans.labels_ == label)
+ dispersions[label] = np.sum(
+ (kmeans.cluster_centers_[label] - pixels_stack[indices]) ** 2
+ ) / len(indices)
+ mean_pixel_value2 = kmeans.cluster_centers_[np.argmin(dispersions)]
+
+ mean_pixel_value = (mean_pixel_value + mean_pixel_value2) / 2
+
+ foreground = Image.fromarray(
+ np.full((size, img.width, 3), mean_pixel_value).astype(np.uint8)
+ ).convert("RGBA")
+ blended_image = Image.composite(foreground, background, blend_mask)
+
+ return blended_image
+
+
+def add_top_frame(img, size, border_size=10, extension_filling=ExtensionFilling.MEAN):
+ """
+ Add a new frame on top of the current image.
+
+ :param Image.Image img: Input image to apply the gradient mask to.
+ :param int size: Height of the gradient mask.
+ :param int border_size: Size of the border to be added to the top of the image.
+ :param extension_filling: Method to fill the top zone with a crop from the original image.
+ :type extension_filling: ExtensionFilling
+
+ :return: A new image with the input image's content blended with a gradient mask.
+ """
+ canvas_size = img.width, img.height + size
+ # Create a white background image
+ new_img = Image.new("RGB", canvas_size, color="white")
+ mask = Image.new("L", canvas_size, color="white")
+
+ new_img.paste(img, (0, size))
+ # Fill the top zone with a crop from the original image
+ cropped = img.crop((0, 0, img.width, border_size))
+ if extension_filling == ExtensionFilling.GRADIENT:
+ blended_image = gradient_fill(cropped, size)
+ new_img.paste(blended_image)
+ elif extension_filling == ExtensionFilling.STRETCH:
+ new_img.paste(cropped.resize((cropped.width, size)))
+ else:
+ arr = np.asarray(cropped)
+ mean_pixel_value = np.mean(arr.reshape(-1, 3), axis=0).astype(np.uint8)
+ averaged_img = Image.fromarray(
+ np.full((size, cropped.width, 3), mean_pixel_value)
+ )
+ new_img.paste(averaged_img)
+
+ mask.paste(
+ Image.new("L", (img.width, img.height - border_size), color="black"),
+ (0, size + border_size),
+ )
+ return new_img, mask
+
+
+def create_top_mask(img, sky_size=None, extension_filling=ExtensionFilling.MEAN):
+ """
+ Create a mask to the top of the image.
+
+ :param Image.Image img: Input image to apply the gradient mask to.
+ :param int sky_size: Height of the gradient mask.
+ If None, it will be set to the default value in the FRAME_CONFIG dictionary.
+ :param extension_filling: Method to fill the top zone with a crop from the original image.
+ Default is ExtensionFilling.MEAN.
+ :type extension_filling: ExtensionFilling
+
+ :return: A tuple containing the new image with the input image's
+ content blended with a gradient mask and the corresponding mask.
+ :rtype: tuple[PIL.Image.Image, PIL.Image.Image]
+ """
+ if sky_size is None:
+ sky_size = FRAME_CONFIG["top_extension"]["radius"]
+ return add_top_frame(img, sky_size, extension_filling=extension_filling)
+
+
+def display_masks(base_file):
+ """Display the image with the mask applied."""
+ base_image = Image.open(base_file)
+ img, _ = horizontal_tiling_mask(base_image, FRAME_CONFIG)
+ img.show("Central mask applied")
+ img, _ = create_top_mask(base_image)
+ img.show("Top mask applied")
+
+
+if __name__ == "__main__":
+ display_masks("../sunny_mountain.png")
diff --git a/skybox/panorama_creator.py b/skybox/panorama_creator.py
new file mode 100644
index 0000000..a4cabb7
--- /dev/null
+++ b/skybox/panorama_creator.py
@@ -0,0 +1,505 @@
+"""
+A Python script using Stable Diffusion and Inpainting to create panorama and skyboxes.
+"""
+
+from PIL import Image, ImageFilter, ImageDraw
+
+import numpy as np
+import torch
+from torchvision.transforms.functional import pil_to_tensor
+
+from skybox.diffusion import generate_images
+from skybox.inpainting import inpaint_image
+from skybox import mask_editor as me
+from skybox import image_processing
+
+
+def clamp(x, low=0, high=1):
+ """
+ Clamp a value between two extremes.
+
+ :param float x: Value to clamp
+ :param float low: Min value
+ :param float high: Max value
+ :return float: Clamped value
+ """
+ return max(min(x, high), low)
+
+
+def equirectangular_projection(img):
+ """
+ Compute an equirectangular projection from a flat image.
+
+ The formula to convert a set of coordinates (latitude, longitude) on a sphere to
+ equirectangular projection is:
+
+ x = (longitude + 180) * (image width / 360) y = (90 - latitude) * (image height / 180)
+
+ But we won't be using such formula.
+
+ :param PIL.Image.Image img: Input image
+ :return PIL.Image.Image: Projected image.
+ """
+ width, height = img.size
+ equirectangular_image = Image.new("RGB", (width, height), "white")
+
+ # Convert each pixel in the equirectangular image
+ for x in range(width):
+ for y in range(height):
+ v = y
+ # [-1, 1]
+ lon, lat = (x - width / 2) * 2 / width, (y - height / 2) * 2 / height
+ u = x + (
+ width / 2 * np.sin(lon * np.pi / 2) * 1 * (1 - np.cos(lat * np.pi / 2))
+ )
+
+ u, v = int(clamp(u, 0, width - 1)), int(clamp(v, 0, height - 1))
+
+ # Map the pixel from the input image to the equirectangular image
+ if (
+ u >= width
+ or v >= height
+ or x >= equirectangular_image.size[0]
+ or y >= equirectangular_image.size[1]
+ ):
+ continue
+ equirectangular_image.putpixel((x, y), img.getpixel((int(u), int(v))))
+
+ return equirectangular_image
+
+
+def cylindrical_projection(img):
+ """
+ Compute a cylindrical projection from a flat image.
+
+ The x-axis is preserved, by the y-axis will be changed.
+ This is the inverse operation of a Lambert projection.
+
+ :param PIL.Image.Image img: Input image
+ :return PIL.Image.Image: Output image in cylindrical projection
+ """
+ image = pil_to_tensor(img)
+ height, _width = image.shape[1:3]
+ cylindrical_image = torch.empty(image.shape)
+
+ # Convert each pixel in the equirectangular image, from [0, height] to [0, height]
+ # As the view is essentially from a cylinder to a sphere, a cosine transformation is applied
+ # We then apply a reverse cosine
+ lines = height * (1 - torch.arccos(torch.linspace(-1, 1, height)) / torch.pi)
+ ratios = lines - torch.round(lines)
+ for y in range(height):
+ v = int(lines[y].item())
+ ratio = ratios[y]
+ if v + 1 < height:
+ interpolates = image[:, v + 1] * ratio + (1 - ratio) * image[:, v]
+ else:
+ interpolates = image[:, height - 1]
+
+ cylindrical_image[:, y] = interpolates
+
+ # Convert as a pillow image
+ cylindrical_image = Image.fromarray(
+ np.transpose(cylindrical_image.numpy(), (1, 2, 0)).astype("uint8")
+ )
+ return cylindrical_image
+
+
+def horizontal_tiling(img):
+ """
+ Simple tiling function to view if an image can be tilled with itself.
+
+ :param PIL.Image.Image img: Base image to tile.
+ :return PIL.Image.Image: Horizontal concatenation of the base image.
+ """
+ width, height = img.size
+ # Create a new image with twice the width
+ new_image = Image.new("RGB", (width * 2, height))
+
+ # Paste the original image twice
+ new_image.paste(img, (0, 0))
+ new_image.paste(img, (width, 0))
+
+ return new_image
+
+
+def blend_borders(img, size=10):
+ """
+ Blend the borders of an image to make them match. The new image is centered on the borders.
+
+ :param PIL.Image.Image img: Input image.
+ :param int size: Number of pixels to use
+
+ :return PIL.Image.Image img: Auto-blended image.
+ """
+
+ width, height = img.size
+ position = width // 2
+
+ right_crop = img.crop((position, 0, width, height))
+
+ translated = img.transform(
+ img.size, Image.Transform.AFFINE, (1, 0, -position, 0, 1, 0)
+ )
+ translated.paste(right_crop, (0, 0))
+
+ box = (width // 2 - size // 2, 0, width // 2 + size // 2, height)
+ central_crop = translated.crop(box)
+
+ central_crop = central_crop.filter(ImageFilter.SMOOTH)
+
+ translated.paste(central_crop, box)
+
+ return translated
+
+
+def rewrite_image_borders(image, steps=20):
+ """
+ Inpaint the borders of an image to remove a seam line.
+
+ :param PIL.Image.Image image: Initial image.
+ :param int steps: Number of steps for inpainting.
+ :return PIL.Image.Image: The inpainted image."""
+ img, mask = me.horizontal_tiling_mask(image)
+ inv_panorama = inpaint_image(
+ "", img, mask, negative_prompt="a logo, a text", num_inference_steps=steps
+ )[0]
+ panorama = image_processing.flip_image_sides(inv_panorama)
+ return panorama
+
+
+def add_ground(base_image, steps, step_callback=None):
+ """
+ Add a ground to an image.
+
+ The process is the following:
+ 1. The bottom part of the base image is selected, copied and stretched.
+ 2. The image is then distorted into a circle, centered on the lower part of the new image.
+ 3. An inpainting process is ran to redraw the ground.
+ 4. The image unrolled to the initial dimensions.
+
+ :param PIL.Image.Image base_image: The input image to be extended as a ground.
+ :param int steps: The number of inference steps for each inpainting process.
+ :param step_callback: Optional callback function to be called after each inference step.
+ :type step_callback: Callable | None
+ :return PIL.Image.Image: The new ground of the image.
+ """
+ # Reverse the image, add a frame to the top part
+ # 2048x256 image
+ half_image = base_image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).crop(
+ (0, 0, base_image.width, base_image.height // 2)
+ )
+ # 2048x512
+ img, _ = me.add_top_frame(
+ half_image,
+ half_image.height,
+ half_image.height // 8,
+ extension_filling=me.ExtensionFilling.STRETCH,
+ )
+ # Distort on the ground
+ img = image_processing.distort_image(img)
+
+ mask = Image.new("L", img.size, color="black")
+ draw = ImageDraw.Draw(mask)
+ draw.ellipse(
+ (
+ half_image.height,
+ half_image.height,
+ img.height - half_image.height,
+ img.height - half_image.height,
+ ),
+ fill="white",
+ )
+ img_with_ground = inpaint_image(
+ "the ground seen from above, uniform color",
+ img,
+ mask,
+ negative_prompt="a logo, a text, clouds, birds",
+ num_inference_steps=steps,
+ callback_on_step_end=step_callback,
+ )[0]
+ extended_ground = (
+ # Unroll from (1024x1024) to (1024x512)
+ image_processing.unroll_top_image(
+ img_with_ground.transpose(Image.Transpose.ROTATE_270), base_image.width
+ )
+ .transpose(Image.Transpose.FLIP_TOP_BOTTOM)
+ .transpose(Image.Transpose.FLIP_LEFT_RIGHT)
+ )
+
+ # Stitch the new ground to the upper part without seam
+ bottom_mask = linear_gradient_mask(
+ (base_image.width, base_image.height // 2), extended_ground.height // 10
+ ).transpose(Image.Transpose.FLIP_TOP_BOTTOM)
+ blend_mask = Image.new("L", extended_ground.size, "white")
+ blend_mask.paste(bottom_mask)
+ initial_ground_frame = Image.new(base_image.mode, extended_ground.size)
+ initial_ground_frame.paste(
+ base_image.crop((0, base_image.height // 2, base_image.width, base_image.height))
+ )
+ new_ground_frame = Image.new(base_image.mode, extended_ground.size)
+ new_ground_frame.paste(extended_ground)
+ ground_blend = Image.composite(new_ground_frame, initial_ground_frame, blend_mask)
+
+ return ground_blend
+
+
+def linear_gradient_mask(size, margin_height=10):
+ """
+ Create a gradient mask for an image that has a logistic curve shape.
+
+ The mask is a grayscale image where the top half is darker and the bottom half is lighter.
+ This is useful for creating a seamless transition between the top and bottom halves of an image.
+
+ :param tuple[int, int] size: The size of the output mask.
+ :param int margin_height: The height of the margin from the mask.
+ :return: A grayscale image representing the gradient mask.
+ """
+ mask = Image.new("L", size)
+
+ gradient = (
+ Image
+ .linear_gradient("L")
+ .transpose(Image.Transpose.FLIP_TOP_BOTTOM)
+ .resize((size[0], margin_height))
+ )
+ mask.paste(gradient)
+ return mask
+
+
+def sigmoid_gradient_mask(width, height, decay=50):
+ """
+ Create a gradient mask for an image that has a logistic curve shape.
+
+ The mask is a grayscale image where the top half is darker and the bottom half is lighter.
+ This is useful for creating a seamless transition between the top and bottom halves of an image.
+
+ :param int width: The width of the output mask.
+ :param int height: The height of the output mask.
+ :param float decay: The speed at which the blending changes.
+ :return: A grayscale image representing the gradient mask.
+ """
+ mask = Image.new("L", (width, height))
+ draw = ImageDraw.Draw(mask)
+
+ indices = np.linspace(0, 1, height)
+
+ # Logistic curve shape
+ shades = 255 / (1 + np.exp(-decay * (indices - 0.5)))
+
+ for i, shade in enumerate(shades):
+ draw.line([(0, i), (width, i)], fill=int(shade))
+
+ return mask
+
+
+def add_sky(input_image, steps, step_callback=None):
+ """
+ Create a sky from the top half of the base image as a sky.
+
+ The sky has the same dimensions as the base image.
+
+ :param PIL.Image.Image input_image: The input image to be extended as a sky.
+ :param int steps: The number of inference steps for each inpainting process.
+ :param step_callback: Optional callback function to be called after each inference step.
+ :type step_callback: Callable | None
+ :return PIL.Image.Image: The final image with more sky.
+ """
+ # Base image is 2504x416, this is too much VRAM, need to reduce the size a bit
+ context_height = input_image.height // 2
+ half_sky = input_image.crop((0, 0, input_image.width, context_height))
+
+ # Prepare the image that will receive an inpainting
+ gradient_extended, mask = me.create_top_mask(
+ half_sky, input_image.height, extension_filling=me.ExtensionFilling.GRADIENT
+ )
+
+ # Distort on a circle
+ img = image_processing.distort_image(gradient_extended)
+
+ mask = Image.new("L", img.size, color="black")
+ draw = ImageDraw.Draw(mask)
+ draw.ellipse(
+ (
+ half_sky.height,
+ half_sky.height,
+ img.height - half_sky.height,
+ img.height - half_sky.height,
+ ),
+ fill="white",
+ )
+
+ img_with_sky = inpaint_image(
+ "the sky seen from below",
+ img,
+ mask,
+ negative_prompt="a logo, a text, birds",
+ num_inference_steps=steps,
+ callback_on_step_end=step_callback,
+ )[0]
+ extended_sky = (
+ # Unroll from (4:4) to (4:~1)
+ image_processing.unroll_top_image(
+ img_with_sky.transpose(Image.Transpose.ROTATE_270), input_image.width
+ )
+ .transpose(Image.Transpose.FLIP_LEFT_RIGHT)
+ )
+
+ # Merge the original image and the extended version to get a seamless blend
+ bottom_mask = linear_gradient_mask(input_image.size, input_image.height // 5)
+ blend_mask = Image.new("L", extended_sky.size, "white")
+ blend_mask.paste(bottom_mask, (0, input_image.height))
+ sky_blend = Image.composite(extended_sky, gradient_extended, blend_mask)
+
+ # Return only the new part
+ return sky_blend
+
+
+def concatenate_images_seamless(top_image, bottom_image):
+ """Vertically concatenate two images together without leaving a seam mark."""
+ blend_mask = sigmoid_gradient_mask(top_image.width, top_image.height * 2)
+
+ foreground = Image.new("RGB", (bottom_image.width, bottom_image.height * 2))
+ foreground.paste(top_image)
+ foreground.paste(
+ top_image.transpose(Image.Transpose.FLIP_TOP_BOTTOM), (0, top_image.height)
+ )
+
+ background = Image.new("RGB", (bottom_image.width, bottom_image.height * 2))
+ background.paste(bottom_image.transpose(Image.Transpose.FLIP_TOP_BOTTOM))
+ background.paste(bottom_image, (0, bottom_image.height))
+
+ return Image.composite(background, foreground, blend_mask)
+
+
+def extend_image(base_image, steps_per_inference=50, step_callback=None):
+ """
+ Triple the height of an image with more sky and ground.
+
+ The optimal dimensions for the base image are 2508x418 (1024*sqrt(6)).
+ The closest dimensions divisible by 8 are 2504x416, but 2048x512 yields better image quality.
+
+ :param PIL.Image.Image base_image: Initial image to work on.
+ :param int steps_per_inference: Number of inference steps for each inpainting process.
+ :param step_callback: Optional callback function to be called after each inference step.
+ :type step_callback: server.task_tracker.TaskTracker | None
+ :return PIL.Image.Image: The final image with more sky and ground.
+ """
+ img_with_sky = add_sky(
+ base_image,
+ min(steps_per_inference, 30),
+ step_callback.incomplete_callback(30) if step_callback else None,
+ )
+ img_with_sky.show()
+
+ # Add the ground
+ extended_ground = add_ground(
+ base_image,
+ steps_per_inference,
+ step_callback.incomplete_callback(30) if step_callback else None,
+ )
+
+ # Add the three pieces to the final canvas
+ final_image = Image.new(base_image.mode, (base_image.width, base_image.height * 5 // 2))
+ final_image.paste(base_image, (0, final_image.height - base_image.height // 2))
+ final_image.paste(img_with_sky)
+ final_image.paste(extended_ground, (0, img_with_sky.height))
+ final_image.show()
+
+ return final_image
+
+
+def legacy_extension(base_image, prompt, num_inference_steps=50):
+ """
+ Extend the base image with the legacy pipeline v0.3.
+
+ The main trade-off of this pipeline was that while it made seamless matching,
+ it uses up to 16 GB of VRAM and was sometimes not compliant to sky and ground requests.
+
+ :param PIL.Image.Image base_image: Initial image to extend.
+ :param str prompt: Prompt to use to tile as a panorama.
+ :param int num_inference_steps: Number of inference steps for generation.
+
+ :return PIL.Image.Image: The extended image with a cylindrical projection.
+ """
+ base_image.show()
+ print("Closing the sky...")
+ img, mask = me.create_top_mask(base_image)
+ img_with_sky = inpaint_image(
+ "a sky, uniform color",
+ img,
+ mask,
+ negative_prompt="a logo, a text, clouds, birds",
+ num_inference_steps=num_inference_steps,
+ )[0]
+ img_with_sky.show("Image with more sky")
+ img, mask = me.horizontal_tiling_mask(img_with_sky)
+ print("Fixing the panorama...")
+ panorama = inpaint_image(
+ prompt,
+ img,
+ mask,
+ negative_prompt="a logo, a text",
+ num_inference_steps=num_inference_steps,
+ )[0]
+ panorama.show("panorama")
+ cylindrical = cylindrical_projection(panorama)
+ blended = blend_borders(cylindrical, 10)
+ # horizontal_tiling(blended).show("manually tiling")
+
+ return blended
+
+
+def generate_panorama_legacy(prompt, num_inference_steps=50):
+ """
+ Create a panorama from a prompt.
+
+ A panorama is an image with a deformation on the vertical axis.
+
+ :param str prompt: The initial user prompt.
+ :param int num_inference_steps: Number of inference steps for each step.
+ :return PIL.Image.Image: The computed panorama.
+ """
+ print("Generating image...")
+ base_image = generate_images(
+ prompt, num_inference_steps=num_inference_steps, width=2048, height=512
+ )[0]
+ extended_image = legacy_extension(base_image, prompt, num_inference_steps)
+ return extended_image
+
+
+def generate_panorama(prompt, num_inference_steps=50, progress_tracker=None):
+ """
+ Create a panorama from a prompt, more complete than the legacy version.
+
+ :param str prompt: The initial user prompt.
+ :param int num_inference_steps: Number of inference steps for each step.
+ :param progress_tracker: A TaskTracker to be called when the step is finished.
+ :type progress_tracker: server.task_tracker.TaskTracker | None
+ :return PIL.Image.Image: The computed panorama.
+ """
+ base_image = generate_images(
+ prompt, num_inference_steps=num_inference_steps, width=2504, height=416,
+ callback_on_step_end=progress_tracker.incomplete_callback(30) if progress_tracker else None
+ )[0]
+ base_image.show()
+ # Inpaint to blend the borders
+ panorama = rewrite_image_borders(base_image)
+ extended_image = extend_image(panorama, num_inference_steps, progress_tracker)
+ return extended_image
+
+
+def __user_interaction(num_inference_steps=50, use_legacy=False):
+ """A demonstration function that asks an image prompt to the user and shows the result."""
+ prompt = input("What panorama do you want? ")
+ if not prompt or prompt.strip().isspace():
+ prompt = "a peaceful valley"
+ print("Using prompt: " + prompt)
+ if use_legacy:
+ img = generate_panorama_legacy(prompt, num_inference_steps)
+ else:
+ img = generate_panorama(prompt, num_inference_steps)
+ img.show("Final panorama")
+
+
+if __name__ == "__main__":
+ __user_interaction(20)
diff --git a/sound/ambient_generation.py b/sound/ambient_generation.py
new file mode 100644
index 0000000..eae0560
--- /dev/null
+++ b/sound/ambient_generation.py
@@ -0,0 +1,85 @@
+"""
+Generates an ambient audio from a text prompt.
+
+* For ambient audio:
+ * https://huggingface.co/declare-lab/tango2 : apparently a good model but difficult to integrate
+ * https://huggingface.co/facebook/audiogen-medium : less good but sufficient model
+* Music : https://huggingface.co/facebook/musicgen-small
+* Text-to-speech : https://huggingface.co/suno/bark
+"""
+
+from audiocraft.models import AudioGen
+from audiocraft.data.audio import audio_write
+
+
+def ambient_audio(descriptions, duration=10):
+ """
+ Generate audio samples based on descriptions provided.
+
+ :param list[str] descriptions: Description of the audio.
+ :param int duration: The duration of the audio.
+ :return tuple[torch.Tensor, int]: WAVE audio samples and sample rate.
+ """
+ model = AudioGen.get_pretrained("facebook/audiogen-medium")
+ model.set_generation_params(duration=duration)
+ wav = model.generate(descriptions)
+
+ return wav, model.sample_rate
+
+
+def ambient_music(descriptions, duration=30):
+ """
+ Generate musics based on the descriptions provided.
+
+ :param list[str] descriptions: Description of the audio.
+ :param int duration: The duration of the audio.
+ :return tuple[torch.Tensor, int]: WAVE audio samples and sample rate.
+ """
+ model = AudioGen.get_pretrained("facebook/musicgen-medium")
+ model.set_generation_params(duration=duration)
+ wav = model.generate(descriptions)
+
+ return wav, model.sample_rate
+
+
+def generate_audio(descriptions, duration=10):
+ """
+ Generates audio samples based on descriptions provided and saves them as .wav files.
+
+ :param list[str] descriptions: Description of the audio.
+ :param int duration: The duration of the audio in seconds. Default is 10 seconds.
+ """
+ wav_data, sample_rate = ambient_audio(descriptions, duration)
+ for idx, one_wav in enumerate(wav_data):
+ # Will save under {idx}.wav, with loudness normalization at -14 db LUFS.
+ audio_write(
+ f"outputs/audio_{idx}.wav",
+ one_wav.cpu(),
+ sample_rate,
+ strategy="loudness",
+ loudness_compressor=True,
+ )
+
+
+def generate_music(descriptions, duration=30):
+ """
+ Generate music based on the descriptions provided.
+
+ :param list[str] descriptions: Description of the audio.
+ :param int duration: The duration of the audio. Default is 30 seconds.
+ """
+ wav_data, sample_rate = ambient_music(descriptions, duration)
+ for idx, one_wav in enumerate(wav_data):
+ # Will save under {idx}.wav, with loudness normalization at -14 db LUFS.
+ audio_write(
+ f"outputs/music_{idx}.wav",
+ one_wav.cpu(),
+ sample_rate,
+ strategy="loudness",
+ loudness_compressor=True,
+ )
+
+
+if __name__ == "__main__":
+ generate_audio(["Seagulls crying", "Waves crashing", "Water lapping at the shore"])
+ # generate_music(["Calm and relaxing music"])
diff --git a/sunny_mountain.png b/sunny_mountain.png
new file mode 100644
index 0000000..23dcc3e
Binary files /dev/null and b/sunny_mountain.png differ
diff --git a/utils/download_models.py b/utils/download_models.py
new file mode 100644
index 0000000..6f594b0
--- /dev/null
+++ b/utils/download_models.py
@@ -0,0 +1,27 @@
+"""
+Simple utility script that forces the download of all models.
+
+Just load the script, and the models should get installed.
+"""
+import asr.speech_to_text
+import skybox.diffusion
+import skybox.inpainting
+
+
+def load_production_pipelines():
+ """Load all pipelines used in the server in order to download the associated models."""
+ print("Starting loading models")
+ print("Loading speech recognition...")
+ asr.speech_to_text.get_asr_model()
+ print("Loading image generation...")
+ skybox.diffusion.get_image_generation_pipeline()
+ print("Loading image refinement...")
+ skybox.diffusion.get_image_refinement_pipeline()
+ print("Loading inpainting...")
+ skybox.inpainting.get_inpainting_pipeline()
+
+ print("Finished loading models with success!")
+
+
+if __name__ == "__main__":
+ load_production_pipelines()