diff --git a/nexa/__init__.py b/nexa/__init__.py
index aa1cdece..230e693b 100644
--- a/nexa/__init__.py
+++ b/nexa/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.8.2"
+__version__ = "0.0.8.3"
diff --git a/nexa/gguf/nexa_inference_image.py b/nexa/gguf/nexa_inference_image.py
index d57bcbed..9bb4b2bb 100644
--- a/nexa/gguf/nexa_inference_image.py
+++ b/nexa/gguf/nexa_inference_image.py
@@ -91,16 +91,14 @@ def __init__(self, model_path, local_path=None, **kwargs):
                 self.ae_downloaded_path, _ = pull_model(self.ae_path)
             if self.clip_l_path:
                 self.clip_l_downloaded_path, _ = pull_model(self.clip_l_path)
-
-        if "lcm-dreamshaper" in self.model_path:
-            self.params = DEFAULT_IMG_GEN_PARAMS_LCM
+        if "lcm-dreamshaper" in self.model_path or "flux" in self.model_path:
+            self.params = DEFAULT_IMG_GEN_PARAMS_LCM.copy() # both lcm-dreamshaper and flux use the same params
         elif "sdxl-turbo" in self.model_path:
-            self.params = DEFAULT_IMG_GEN_PARAMS_TURBO
+            self.params = DEFAULT_IMG_GEN_PARAMS_TURBO.copy()
         else:
-            self.params = DEFAULT_IMG_GEN_PARAMS
-
-        self.params.update(kwargs)
+            self.params = DEFAULT_IMG_GEN_PARAMS.copy()
 
+        self.params.update({k: v for k, v in kwargs.items() if v is not None})
         if not kwargs.get("streamlit", False):
             self._load_model(model_path)
             if self.model is None:
@@ -111,17 +109,29 @@ def __init__(self, model_path, local_path=None, **kwargs):
     def _load_model(self, model_path: str):
         with suppress_stdout_stderr():
             from nexa.gguf.sd.stable_diffusion import StableDiffusion
-
-            self.model = StableDiffusion(
-                model_path=self.downloaded_path,
-                lora_model_dir=self.params.get("lora_dir", ""),
-                n_threads=self.params.get("n_threads", multiprocessing.cpu_count()),
-                wtype=self.params.get(
-                    "wtype", NEXA_RUN_MODEL_PRECISION_MAP.get(model_path, "f32")
-                ),  # Weight type (options: default, f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)
-                control_net_path=self.params.get("control_net_path", ""),
-                verbose=False,
-            )
+            if self.t5xxl_downloaded_path and self.ae_downloaded_path and self.clip_l_downloaded_path:
+                self.model = StableDiffusion(
+                    diffusion_model_path=self.downloaded_path,
+                    clip_l_path=self.clip_l_downloaded_path,
+                    t5xxl_path=self.t5xxl_downloaded_path,
+                    vae_path=self.ae_downloaded_path,
+                    n_threads=self.params.get("n_threads", multiprocessing.cpu_count()),
+                    wtype=self.params.get(
+                        "wtype", NEXA_RUN_MODEL_PRECISION_MAP.get(model_path, "default")
+                    ),  # Weight type (options: default, f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)
+                    verbose=False,
+                )
+            else:
+                self.model = StableDiffusion(
+                    model_path=self.downloaded_path,
+                    lora_model_dir=self.params.get("lora_dir", ""),
+                    n_threads=self.params.get("n_threads", multiprocessing.cpu_count()),
+                    wtype=self.params.get(
+                        "wtype", NEXA_RUN_MODEL_PRECISION_MAP.get(model_path, "default")
+                    ),  # Weight type (options: default, f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)
+                    control_net_path=self.params.get("control_net_path", ""),
+                    verbose=False,
+                )
 
     def _save_images(self, images):
         """
diff --git a/nexa/gguf/sd/_internals_diffusion.py b/nexa/gguf/sd/_internals_diffusion.py
index fe918458..55ecf0a5 100644
--- a/nexa/gguf/sd/_internals_diffusion.py
+++ b/nexa/gguf/sd/_internals_diffusion.py
@@ -1,4 +1,7 @@
 import os
+from contextlib import ExitStack
+
+from nexa.gguf.sd._utils_diffusion import suppress_stdout_stderr
 
 import nexa.gguf.sd.stable_diffusion_cpp as sd_cpp
 
@@ -59,6 +62,7 @@ def __init__(
         self.keep_control_net_cpu = keep_control_net_cpu
         self.keep_vae_on_cpu = keep_vae_on_cpu
         self.verbose = verbose
+        self._exit_stack = ExitStack()
 
         self.model = None
 
@@ -75,39 +79,50 @@ def __init__(
                 raise ValueError(f"Diffusion model path does not exist: {diffusion_model_path}")
 
         if model_path or diffusion_model_path:
-            # Load the Stable Diffusion model ctx
-            self.model = sd_cpp.new_sd_ctx(
-                self.model_path.encode("utf-8"),
-                self.clip_l_path.encode("utf-8"),
-                self.t5xxl_path.encode("utf-8"),
-                self.diffusion_model_path.encode("utf-8"),
-                self.vae_path.encode("utf-8"),
-                self.taesd_path.encode("utf-8"),
-                self.control_net_path.encode("utf-8"),
-                self.lora_model_dir.encode("utf-8"),
-                self.embed_dir.encode("utf-8"),
-                self.stacked_id_embed_dir.encode("utf-8"),
-                self.vae_decode_only,
-                self.vae_tiling,
-                self.free_params_immediately,
-                self.n_threads,
-                self.wtype,
-                self.rng_type,
-                self.schedule,
-                self.keep_clip_on_cpu,
-                self.keep_control_net_cpu,
-                self.keep_vae_on_cpu,
-            )
+            with suppress_stdout_stderr(disable=verbose):
+                # Load the Stable Diffusion model ctx
+                self.model = sd_cpp.new_sd_ctx(
+                    self.model_path.encode("utf-8"),
+                    self.clip_l_path.encode("utf-8"),
+                    self.t5xxl_path.encode("utf-8"),
+                    self.diffusion_model_path.encode("utf-8"),
+                    self.vae_path.encode("utf-8"),
+                    self.taesd_path.encode("utf-8"),
+                    self.control_net_path.encode("utf-8"),
+                    self.lora_model_dir.encode("utf-8"),
+                    self.embed_dir.encode("utf-8"),
+                    self.stacked_id_embed_dir.encode("utf-8"),
+                    self.vae_decode_only,
+                    self.vae_tiling,
+                    self.free_params_immediately,
+                    self.n_threads,
+                    self.wtype,
+                    self.rng_type,
+                    self.schedule,
+                    self.keep_clip_on_cpu,
+                    self.keep_control_net_cpu,
+                    self.keep_vae_on_cpu,
+                )
 
             # Check if the model was loaded successfully
             if self.model is None:
                 raise ValueError(f"Failed to load model from file: {model_path}")
 
+        def free_ctx():
+            """Free the model from memory."""
+            if self.model is not None and self._free_sd_ctx is not None:
+                self._free_sd_ctx(self.model)
+                self.model = None
+
+        self._exit_stack.callback(free_ctx)
+
+    def close(self):
+        """Closes the exit stack, ensuring all context managers are exited."""
+        self._exit_stack.close()
+
     def __del__(self):
-        """Free the model when the object is deleted."""
-        if self.model is not None and self._free_sd_ctx is not None:
-            self._free_sd_ctx(self.model)
-            self.model = None
+        """Free memory when the object is deleted."""
+        self.close()
 
 
 # ============================================
@@ -132,6 +147,7 @@ def __init__(
         self.n_threads = n_threads
         self.wtype = wtype
         self.verbose = verbose
+        self._exit_stack = ExitStack()
 
         self.upscaler = None
 
@@ -151,8 +167,18 @@ def __init__(
             if self.upscaler is None:
                 raise ValueError(f"Failed to load upscaler model from file: {upscaler_path}")
 
+        def free_ctx():
+            """Free the model from memory."""
+            if self.upscaler is not None and self._free_upscaler_ctx is not None:
+                self._free_upscaler_ctx(self.upscaler)
+                self.upscaler = None
+
+        self._exit_stack.callback(free_ctx)
+
+    def close(self):
+        """Closes the exit stack, ensuring all context managers are exited."""
+        self._exit_stack.close()
+
     def __del__(self):
-        """Free the upscaler model when the object is deleted."""
-        if self.upscaler is not None and self._free_upscaler_ctx is not None:
-            self._free_upscaler_ctx(self.upscaler)
-            self.upscaler = None
\ No newline at end of file
+        """Free memory when the object is deleted."""
+        self.close()
\ No newline at end of file
diff --git a/nexa/gguf/sd/_logger_diffusion.py b/nexa/gguf/sd/_logger_diffusion.py
index 1326a7d8..25a960a6 100644
--- a/nexa/gguf/sd/_logger_diffusion.py
+++ b/nexa/gguf/sd/_logger_diffusion.py
@@ -2,8 +2,6 @@
 import ctypes
 import logging
 
-import nexa.gguf.sd.stable_diffusion_cpp as stable_diffusion_cpp
-
 # enum sd_log_level_t {
 #     SD_LOG_DEBUG = 0,
 #     SD_LOG_INFO = 1,
diff --git a/nexa/gguf/sd/_utils_diffusion.py b/nexa/gguf/sd/_utils_diffusion.py
new file mode 100644
index 00000000..4b071c68
--- /dev/null
+++ b/nexa/gguf/sd/_utils_diffusion.py
@@ -0,0 +1,59 @@
+import os
+import sys
+
+# Avoid "LookupError: unknown encoding: ascii" when open() called in a destructor
+outnull_file = open(os.devnull, "w")
+errnull_file = open(os.devnull, "w")
+
+STDOUT_FILENO = 1
+STDERR_FILENO = 2
+
+
+class suppress_stdout_stderr(object):
+    """
+    Stops all output to stdout and stderr when used as a context manager (GGML will otherwise still print logs).
+    Source: https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/_utils.py
+    """
+
+    # NOTE: these must be "saved" here to avoid exceptions when using
+    #       this context manager inside of a __del__ method
+    sys = sys
+    os = os
+
+    def __init__(self, disable: bool = True):
+        self.disable = disable
+
+    # Oddly enough this works better than the contextlib version
+    def __enter__(self):
+        if self.disable:
+            return self
+
+        self.old_stdout_fileno_undup = STDOUT_FILENO
+        self.old_stderr_fileno_undup = STDERR_FILENO
+
+        self.old_stdout_fileno = self.os.dup(self.old_stdout_fileno_undup)
+        self.old_stderr_fileno = self.os.dup(self.old_stderr_fileno_undup)
+
+        self.old_stdout = self.sys.stdout
+        self.old_stderr = self.sys.stderr
+
+        self.os.dup2(outnull_file.fileno(), self.old_stdout_fileno_undup)
+        self.os.dup2(errnull_file.fileno(), self.old_stderr_fileno_undup)
+
+        self.sys.stdout = outnull_file
+        self.sys.stderr = errnull_file
+        return self
+
+    def __exit__(self, *_):
+        if self.disable:
+            return
+
+        # Check if sys.stdout and sys.stderr have fileno method
+        self.sys.stdout = self.old_stdout
+        self.sys.stderr = self.old_stderr
+
+        self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
+        self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
+
+        self.os.close(self.old_stdout_fileno)
+        self.os.close(self.old_stderr_fileno)
\ No newline at end of file
diff --git a/nexa/gguf/sd/stable_diffusion.py b/nexa/gguf/sd/stable_diffusion.py
index e286f988..cf0e4cb1 100644
--- a/nexa/gguf/sd/stable_diffusion.py
+++ b/nexa/gguf/sd/stable_diffusion.py
@@ -2,15 +2,16 @@
 import random
 import ctypes
 import multiprocessing
-from PIL import Image
+import contextlib
 
+from PIL import Image
 
 import nexa.gguf.sd.stable_diffusion_cpp as sd_cpp
 from nexa.gguf.sd.stable_diffusion_cpp import GGMLType, RNGType, Schedule, SampleMethod
 
 
 from nexa.gguf.sd._internals_diffusion import _StableDiffusionModel, _UpscalerModel
-# from nexa._logger_diffusion import set_verbose
+from nexa.gguf.sd._utils_diffusion import suppress_stdout_stderr
 
 
 class StableDiffusion:
@@ -104,6 +105,7 @@ def __init__(
         self.keep_clip_on_cpu = keep_clip_on_cpu
         self.keep_control_net_cpu = keep_control_net_cpu
         self.keep_vae_on_cpu = keep_vae_on_cpu
+        self._stack = contextlib.ExitStack()
 
         # =========== Logging ===========
 
@@ -118,34 +120,46 @@ def __init__(
 
         # =========== SD Model loading ===========
 
-        # Load the Stable Diffusion model
-        self._model = _StableDiffusionModel(
-            self.model_path,
-            self.clip_l_path,
-            self.t5xxl_path,
-            self.diffusion_model_path,
-            self.vae_path,
-            self.taesd_path,
-            self.control_net_path,
-            self.lora_model_dir,
-            self.embed_dir,
-            self.stacked_id_embed_dir,
-            self.vae_decode_only,
-            self.vae_tiling,
-            self.free_params_immediately,
-            self.n_threads,
-            self.wtype,
-            self.rng_type,
-            self.schedule,
-            self.keep_clip_on_cpu,
-            self.keep_control_net_cpu,
-            self.keep_vae_on_cpu,
-            self.verbose,
+        self._model = self._stack.enter_context(
+            contextlib.closing(
+                _StableDiffusionModel(
+                    self.model_path,
+                    self.clip_l_path,
+                    self.t5xxl_path,
+                    self.diffusion_model_path,
+                    self.vae_path,
+                    self.taesd_path,
+                    self.control_net_path,
+                    self.lora_model_dir,
+                    self.embed_dir,
+                    self.stacked_id_embed_dir,
+                    self.vae_decode_only,
+                    self.vae_tiling,
+                    self.free_params_immediately,
+                    self.n_threads,
+                    self.wtype,
+                    self.rng_type,
+                    self.schedule,
+                    self.keep_clip_on_cpu,
+                    self.keep_control_net_cpu,
+                    self.keep_vae_on_cpu,
+                    self.verbose,
+                )
+            )
         )
 
         # =========== Upscaling Model loading ===========
 
-        self._upscaler = _UpscalerModel(upscaler_path, self.n_threads, self.wtype, self.verbose)
+        self._upscaler = self._stack.enter_context(
+            contextlib.closing(
+                _UpscalerModel(
+                    upscaler_path,
+                    self.n_threads,
+                    self.wtype,
+                    self.verbose,
+                )
+            )
+        )
 
     @property
     def model(self) -> sd_cpp.sd_ctx_t_p:
@@ -216,6 +230,10 @@ def txt_to_img(
 
         sample_method = validate_and_set_input(sample_method, SAMPLE_METHOD_MAP, "sample_method")
 
+        # Ensure dimensions are multiples of 64
+        width = validate_dimensions(width, "width")
+        height = validate_dimensions(height, "height")
+
         # =========== Set seed ===========
 
         # Set a random seed if seed is negative
@@ -241,26 +259,27 @@ def sd_progress_callback(
 
         control_cond = self._format_control_cond(control_cond, canny, self.control_net_path)
 
-        # Run the txt2img to generate images
-        c_images = sd_cpp.txt2img(
-            self.model,
-            prompt.encode("utf-8"),
-            negative_prompt.encode("utf-8"),
-            clip_skip,
-            cfg_scale,
-            guidance,
-            width,
-            height,
-            sample_method,
-            sample_steps,
-            seed,
-            batch_count,
-            control_cond,
-            control_strength,
-            style_strength,
-            normalize_input,
-            input_id_images_path.encode("utf-8"),
-        )
+        with suppress_stdout_stderr(disable=self.verbose):
+            # Generate images
+            c_images = sd_cpp.txt2img(
+                self.model,
+                prompt.encode("utf-8"),
+                negative_prompt.encode("utf-8"),
+                clip_skip,
+                cfg_scale,
+                guidance,
+                width,
+                height,
+                sample_method,
+                sample_steps,
+                seed,
+                batch_count,
+                control_cond,
+                control_strength,
+                style_strength,
+                normalize_input,
+                input_id_images_path.encode("utf-8"),
+            )
 
         # Convert the C array of images to a Python list of images
         return self._sd_image_t_p_to_images(c_images, batch_count, upscale_factor)
@@ -328,6 +347,10 @@ def img_to_img(
 
         sample_method = validate_and_set_input(sample_method, SAMPLE_METHOD_MAP, "sample_method")
 
+        # Ensure dimensions are multiples of 64
+        width = validate_dimensions(width, "width")
+        height = validate_dimensions(height, "height")
+
         # =========== Set seed ===========
 
         # Set a random seed if seed is negative
@@ -361,27 +384,29 @@ def sd_progress_callback(
 
         image_pointer = self._image_to_sd_image_t_p(image)
 
-        c_images = sd_cpp.img2img(
-            self.model,
-            image_pointer,
-            prompt.encode("utf-8"),
-            negative_prompt.encode("utf-8"),
-            clip_skip,
-            cfg_scale,
-            guidance,
-            width,
-            height,
-            sample_method,
-            sample_steps,
-            strength,
-            seed,
-            batch_count,
-            control_cond,
-            control_strength,
-            style_strength,
-            normalize_input,
-            input_id_images_path.encode("utf-8"),
-        )
+        with suppress_stdout_stderr(disable=self.verbose):
+            # Generate images
+            c_images = sd_cpp.img2img(
+                self.model,
+                image_pointer,
+                prompt.encode("utf-8"),
+                negative_prompt.encode("utf-8"),
+                clip_skip,
+                cfg_scale,
+                guidance,
+                width,
+                height,
+                sample_method,
+                sample_steps,
+                strength,
+                seed,
+                batch_count,
+                control_cond,
+                control_strength,
+                style_strength,
+                normalize_input,
+                input_id_images_path.encode("utf-8"),
+            )
         return self._sd_image_t_p_to_images(c_images, batch_count, upscale_factor)
 
     # ============================================
@@ -426,14 +451,20 @@ def img_to_vid(
         Returns:
             A list of Pillow Images."""
 
-        if self.model is None:
-            raise Exception("Stable diffusion model not loaded.")
+        raise NotImplementedError("Not yet implemented.")
 
-        # =========== Validate string and int inputs ===========
+        # if self.model is None:
+        #     raise Exception("Stable diffusion model not loaded.")
 
-        sample_method = validate_and_set_input(sample_method, SAMPLE_METHOD_MAP, "sample_method")
+        # # =========== Validate string and int inputs ===========
 
-        # =========== Set seed ===========
+        # sample_method = validate_and_set_input(sample_method, SAMPLE_METHOD_MAP, "sample_method")
+
+        # # Ensure dimensions are multiples of 64
+        # width = validate_dimensions(width, "width")
+        # height = validate_dimensions(height, "height")
+
+        # # =========== Set seed ===========
 
         # # Set a random seed if seed is negative
         # if seed < 0:
@@ -464,25 +495,26 @@ def img_to_vid(
 
         # image_pointer = self._image_to_sd_image_t_p(image)
 
-        # c_video = sd_cpp.img2vid(
-        #     self.model,
-        #     image_pointer,
-        #     width,
-        #     height,
-        #     video_frames,
-        #     motion_bucket_id,
-        #     fps,
-        #     augmentation_level,
-        #     min_cfg,
-        #     cfg_scale,
-        #     sample_method,
-        #     sample_steps,
-        #     strength,
-        #     seed,
-        # )
+        # with suppress_stdout_stderr(disable=self.verbose):
+        #     # Generate the video
+        #     c_video = sd_cpp.img2vid(
+        #         self.model,
+        #         image_pointer,
+        #         width,
+        #         height,
+        #         video_frames,
+        #         motion_bucket_id,
+        #         fps,
+        #         augmentation_level,
+        #         min_cfg,
+        #         cfg_scale,
+        #         sample_method,
+        #         sample_steps,
+        #         strength,
+        #         seed,
+        #     )
 
         # return self._sd_image_t_p_to_images(c_video, video_frames, 1)
-        raise NotImplementedError("Not yet implemented.")
 
     # ============================================
     # Preprocess Canny
@@ -491,8 +523,6 @@ def img_to_vid(
     def preprocess_canny(
         self,
         image: Union[Image.Image, str],
-        width: int = 512,
-        height: int = 512,
         high_threshold: float = 0.08,
         low_threshold: float = 0.08,
         weak: float = 0.8,
@@ -500,12 +530,10 @@ def preprocess_canny(
         inverse: bool = False,
         output_as_c_uint8: bool = False,
     ) -> Image.Image:
-        """Apply canny edge detection to an input image.
+        """Apply canny edge detection to an input image. Width and height determined automatically.
 
         Args:
             image: The input image path or Pillow Image.
-            width: Output image height, in pixel space.
-            height: Output image width, in pixel space.
             high_threshold: High edge detection threshold.
             low_threshold: Low edge detection threshold.
             weak: Weak edge thickness.
@@ -519,17 +547,18 @@ def preprocess_canny(
         # Convert the image to a C uint8 pointer
         data, width, height = self._cast_image(image)
 
-        # Run the preprocess canny
-        c_image = sd_cpp.preprocess_canny(
-            data,
-            width,
-            height,
-            high_threshold,
-            low_threshold,
-            weak,
-            strong,
-            inverse,
-        )
+        with suppress_stdout_stderr(disable=self.verbose):
+            # Run the preprocess canny
+            c_image = sd_cpp.preprocess_canny(
+                data,
+                int(width),
+                int(height),
+                high_threshold,
+                low_threshold,
+                weak,
+                strong,
+                inverse,
+            )
 
         # Return the c_image if output_as_c_uint8 (for running inside txt2img/img2img pipeline)
         if output_as_c_uint8:
@@ -592,12 +621,13 @@ def sd_progress_callback(
             # Convert the image to a byte array
             image_bytes = self._image_to_sd_image_t_p(image)
 
-            # Upscale the image
-            image = sd_cpp.upscale(
-                self.upscaler,
-                image_bytes,
-                upscale_factor,
-            )
+            with suppress_stdout_stderr(disable=self.verbose):
+                # Upscale the image
+                image = sd_cpp.upscale(
+                    self.upscaler,
+                    image_bytes,
+                    upscale_factor,
+                )
 
             # Load the image from the C sd_image_t and convert it to a PIL Image
             image = self._dereference_sd_image_t_p(image)
@@ -649,9 +679,9 @@ def _format_control_cond(
         if not control_cond:
             return None
 
-        if not control_net_path:
-            log_event(1, "'control_net_path' not set. Skipping control condition.")
-            return None
+        # if not control_net_path:
+        #     log_event(1, "'control_net_path' not set. Skipping control condition.")
+        #     return None
 
         if canny:
             # Convert Pillow Image to canny edge detection image then format into C sd_image_t
@@ -773,6 +803,29 @@ def _bytes_to_image(self, byte_data: bytes, width: int, height: int):
                 )
         return image
 
+    def __setstate__(self, state):
+        self.__init__(**state)
+
+    def close(self) -> None:
+        """Explicitly free the model from memory."""
+        self._stack.close()
+
+    def __del__(self) -> None:
+        self.close()
+
+
+# ============================================
+# Validate dimension parameters
+# ============================================
+
+
+def validate_dimensions(dimension: int | float, attribute_name: str) -> int:
+    """Dimensions must be a multiple of 64 otherwise a GGML_ASSERT error is encountered."""
+    dimension = int(dimension)
+    if dimension <= 0 or dimension % 64 != 0:
+        raise ValueError(f"The '{attribute_name}' must be a multiple of 64.")
+    return dimension
+
 
 # ============================================
 # Mapping from strings to constants