diff --git a/nexa/__init__.py b/nexa/__init__.py index aa1cdece..230e693b 100644 --- a/nexa/__init__.py +++ b/nexa/__init__.py @@ -1 +1 @@ -__version__ = "0.0.8.2" +__version__ = "0.0.8.3" diff --git a/nexa/gguf/nexa_inference_image.py b/nexa/gguf/nexa_inference_image.py index d57bcbed..9bb4b2bb 100644 --- a/nexa/gguf/nexa_inference_image.py +++ b/nexa/gguf/nexa_inference_image.py @@ -91,16 +91,14 @@ def __init__(self, model_path, local_path=None, **kwargs): self.ae_downloaded_path, _ = pull_model(self.ae_path) if self.clip_l_path: self.clip_l_downloaded_path, _ = pull_model(self.clip_l_path) - - if "lcm-dreamshaper" in self.model_path: - self.params = DEFAULT_IMG_GEN_PARAMS_LCM + if "lcm-dreamshaper" in self.model_path or "flux" in self.model_path: + self.params = DEFAULT_IMG_GEN_PARAMS_LCM.copy() # both lcm-dreamshaper and flux use the same params elif "sdxl-turbo" in self.model_path: - self.params = DEFAULT_IMG_GEN_PARAMS_TURBO + self.params = DEFAULT_IMG_GEN_PARAMS_TURBO.copy() else: - self.params = DEFAULT_IMG_GEN_PARAMS - - self.params.update(kwargs) + self.params = DEFAULT_IMG_GEN_PARAMS.copy() + self.params.update({k: v for k, v in kwargs.items() if v is not None}) if not kwargs.get("streamlit", False): self._load_model(model_path) if self.model is None: @@ -111,17 +109,29 @@ def __init__(self, model_path, local_path=None, **kwargs): def _load_model(self, model_path: str): with suppress_stdout_stderr(): from nexa.gguf.sd.stable_diffusion import StableDiffusion - - self.model = StableDiffusion( - model_path=self.downloaded_path, - lora_model_dir=self.params.get("lora_dir", ""), - n_threads=self.params.get("n_threads", multiprocessing.cpu_count()), - wtype=self.params.get( - "wtype", NEXA_RUN_MODEL_PRECISION_MAP.get(model_path, "f32") - ), # Weight type (options: default, f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0) - control_net_path=self.params.get("control_net_path", ""), - verbose=False, - ) + if self.t5xxl_downloaded_path and self.ae_downloaded_path and self.clip_l_downloaded_path: + self.model = StableDiffusion( + diffusion_model_path=self.downloaded_path, + clip_l_path=self.clip_l_downloaded_path, + t5xxl_path=self.t5xxl_downloaded_path, + vae_path=self.ae_downloaded_path, + n_threads=self.params.get("n_threads", multiprocessing.cpu_count()), + wtype=self.params.get( + "wtype", NEXA_RUN_MODEL_PRECISION_MAP.get(model_path, "default") + ), # Weight type (options: default, f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0) + verbose=False, + ) + else: + self.model = StableDiffusion( + model_path=self.downloaded_path, + lora_model_dir=self.params.get("lora_dir", ""), + n_threads=self.params.get("n_threads", multiprocessing.cpu_count()), + wtype=self.params.get( + "wtype", NEXA_RUN_MODEL_PRECISION_MAP.get(model_path, "default") + ), # Weight type (options: default, f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0) + control_net_path=self.params.get("control_net_path", ""), + verbose=False, + ) def _save_images(self, images): """ diff --git a/nexa/gguf/sd/_internals_diffusion.py b/nexa/gguf/sd/_internals_diffusion.py index fe918458..55ecf0a5 100644 --- a/nexa/gguf/sd/_internals_diffusion.py +++ b/nexa/gguf/sd/_internals_diffusion.py @@ -1,4 +1,7 @@ import os +from contextlib import ExitStack + +from nexa.gguf.sd._utils_diffusion import suppress_stdout_stderr import nexa.gguf.sd.stable_diffusion_cpp as sd_cpp @@ -59,6 +62,7 @@ def __init__( self.keep_control_net_cpu = keep_control_net_cpu self.keep_vae_on_cpu = keep_vae_on_cpu self.verbose = verbose + self._exit_stack = ExitStack() self.model = None @@ -75,39 +79,50 @@ def __init__( raise ValueError(f"Diffusion model path does not exist: {diffusion_model_path}") if model_path or diffusion_model_path: - # Load the Stable Diffusion model ctx - self.model = sd_cpp.new_sd_ctx( - self.model_path.encode("utf-8"), - self.clip_l_path.encode("utf-8"), - self.t5xxl_path.encode("utf-8"), - self.diffusion_model_path.encode("utf-8"), - self.vae_path.encode("utf-8"), - self.taesd_path.encode("utf-8"), - self.control_net_path.encode("utf-8"), - self.lora_model_dir.encode("utf-8"), - self.embed_dir.encode("utf-8"), - self.stacked_id_embed_dir.encode("utf-8"), - self.vae_decode_only, - self.vae_tiling, - self.free_params_immediately, - self.n_threads, - self.wtype, - self.rng_type, - self.schedule, - self.keep_clip_on_cpu, - self.keep_control_net_cpu, - self.keep_vae_on_cpu, - ) + with suppress_stdout_stderr(disable=verbose): + # Load the Stable Diffusion model ctx + self.model = sd_cpp.new_sd_ctx( + self.model_path.encode("utf-8"), + self.clip_l_path.encode("utf-8"), + self.t5xxl_path.encode("utf-8"), + self.diffusion_model_path.encode("utf-8"), + self.vae_path.encode("utf-8"), + self.taesd_path.encode("utf-8"), + self.control_net_path.encode("utf-8"), + self.lora_model_dir.encode("utf-8"), + self.embed_dir.encode("utf-8"), + self.stacked_id_embed_dir.encode("utf-8"), + self.vae_decode_only, + self.vae_tiling, + self.free_params_immediately, + self.n_threads, + self.wtype, + self.rng_type, + self.schedule, + self.keep_clip_on_cpu, + self.keep_control_net_cpu, + self.keep_vae_on_cpu, + ) # Check if the model was loaded successfully if self.model is None: raise ValueError(f"Failed to load model from file: {model_path}") + def free_ctx(): + """Free the model from memory.""" + if self.model is not None and self._free_sd_ctx is not None: + self._free_sd_ctx(self.model) + self.model = None + + self._exit_stack.callback(free_ctx) + + def close(self): + """Closes the exit stack, ensuring all context managers are exited.""" + self._exit_stack.close() + def __del__(self): - """Free the model when the object is deleted.""" - if self.model is not None and self._free_sd_ctx is not None: - self._free_sd_ctx(self.model) - self.model = None + """Free memory when the object is deleted.""" + self.close() # ============================================ @@ -132,6 +147,7 @@ def __init__( self.n_threads = n_threads self.wtype = wtype self.verbose = verbose + self._exit_stack = ExitStack() self.upscaler = None @@ -151,8 +167,18 @@ def __init__( if self.upscaler is None: raise ValueError(f"Failed to load upscaler model from file: {upscaler_path}") + def free_ctx(): + """Free the model from memory.""" + if self.upscaler is not None and self._free_upscaler_ctx is not None: + self._free_upscaler_ctx(self.upscaler) + self.upscaler = None + + self._exit_stack.callback(free_ctx) + + def close(self): + """Closes the exit stack, ensuring all context managers are exited.""" + self._exit_stack.close() + def __del__(self): - """Free the upscaler model when the object is deleted.""" - if self.upscaler is not None and self._free_upscaler_ctx is not None: - self._free_upscaler_ctx(self.upscaler) - self.upscaler = None \ No newline at end of file + """Free memory when the object is deleted.""" + self.close() \ No newline at end of file diff --git a/nexa/gguf/sd/_logger_diffusion.py b/nexa/gguf/sd/_logger_diffusion.py index 1326a7d8..25a960a6 100644 --- a/nexa/gguf/sd/_logger_diffusion.py +++ b/nexa/gguf/sd/_logger_diffusion.py @@ -2,8 +2,6 @@ import ctypes import logging -import nexa.gguf.sd.stable_diffusion_cpp as stable_diffusion_cpp - # enum sd_log_level_t { # SD_LOG_DEBUG = 0, # SD_LOG_INFO = 1, diff --git a/nexa/gguf/sd/_utils_diffusion.py b/nexa/gguf/sd/_utils_diffusion.py new file mode 100644 index 00000000..4b071c68 --- /dev/null +++ b/nexa/gguf/sd/_utils_diffusion.py @@ -0,0 +1,59 @@ +import os +import sys + +# Avoid "LookupError: unknown encoding: ascii" when open() called in a destructor +outnull_file = open(os.devnull, "w") +errnull_file = open(os.devnull, "w") + +STDOUT_FILENO = 1 +STDERR_FILENO = 2 + + +class suppress_stdout_stderr(object): + """ + Stops all output to stdout and stderr when used as a context manager (GGML will otherwise still print logs). + Source: https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/_utils.py + """ + + # NOTE: these must be "saved" here to avoid exceptions when using + # this context manager inside of a __del__ method + sys = sys + os = os + + def __init__(self, disable: bool = True): + self.disable = disable + + # Oddly enough this works better than the contextlib version + def __enter__(self): + if self.disable: + return self + + self.old_stdout_fileno_undup = STDOUT_FILENO + self.old_stderr_fileno_undup = STDERR_FILENO + + self.old_stdout_fileno = self.os.dup(self.old_stdout_fileno_undup) + self.old_stderr_fileno = self.os.dup(self.old_stderr_fileno_undup) + + self.old_stdout = self.sys.stdout + self.old_stderr = self.sys.stderr + + self.os.dup2(outnull_file.fileno(), self.old_stdout_fileno_undup) + self.os.dup2(errnull_file.fileno(), self.old_stderr_fileno_undup) + + self.sys.stdout = outnull_file + self.sys.stderr = errnull_file + return self + + def __exit__(self, *_): + if self.disable: + return + + # Check if sys.stdout and sys.stderr have fileno method + self.sys.stdout = self.old_stdout + self.sys.stderr = self.old_stderr + + self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup) + self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup) + + self.os.close(self.old_stdout_fileno) + self.os.close(self.old_stderr_fileno) \ No newline at end of file diff --git a/nexa/gguf/sd/stable_diffusion.py b/nexa/gguf/sd/stable_diffusion.py index e286f988..cf0e4cb1 100644 --- a/nexa/gguf/sd/stable_diffusion.py +++ b/nexa/gguf/sd/stable_diffusion.py @@ -2,15 +2,16 @@ import random import ctypes import multiprocessing -from PIL import Image +import contextlib +from PIL import Image import nexa.gguf.sd.stable_diffusion_cpp as sd_cpp from nexa.gguf.sd.stable_diffusion_cpp import GGMLType, RNGType, Schedule, SampleMethod from nexa.gguf.sd._internals_diffusion import _StableDiffusionModel, _UpscalerModel -# from nexa._logger_diffusion import set_verbose +from nexa.gguf.sd._utils_diffusion import suppress_stdout_stderr class StableDiffusion: @@ -104,6 +105,7 @@ def __init__( self.keep_clip_on_cpu = keep_clip_on_cpu self.keep_control_net_cpu = keep_control_net_cpu self.keep_vae_on_cpu = keep_vae_on_cpu + self._stack = contextlib.ExitStack() # =========== Logging =========== @@ -118,34 +120,46 @@ def __init__( # =========== SD Model loading =========== - # Load the Stable Diffusion model - self._model = _StableDiffusionModel( - self.model_path, - self.clip_l_path, - self.t5xxl_path, - self.diffusion_model_path, - self.vae_path, - self.taesd_path, - self.control_net_path, - self.lora_model_dir, - self.embed_dir, - self.stacked_id_embed_dir, - self.vae_decode_only, - self.vae_tiling, - self.free_params_immediately, - self.n_threads, - self.wtype, - self.rng_type, - self.schedule, - self.keep_clip_on_cpu, - self.keep_control_net_cpu, - self.keep_vae_on_cpu, - self.verbose, + self._model = self._stack.enter_context( + contextlib.closing( + _StableDiffusionModel( + self.model_path, + self.clip_l_path, + self.t5xxl_path, + self.diffusion_model_path, + self.vae_path, + self.taesd_path, + self.control_net_path, + self.lora_model_dir, + self.embed_dir, + self.stacked_id_embed_dir, + self.vae_decode_only, + self.vae_tiling, + self.free_params_immediately, + self.n_threads, + self.wtype, + self.rng_type, + self.schedule, + self.keep_clip_on_cpu, + self.keep_control_net_cpu, + self.keep_vae_on_cpu, + self.verbose, + ) + ) ) # =========== Upscaling Model loading =========== - self._upscaler = _UpscalerModel(upscaler_path, self.n_threads, self.wtype, self.verbose) + self._upscaler = self._stack.enter_context( + contextlib.closing( + _UpscalerModel( + upscaler_path, + self.n_threads, + self.wtype, + self.verbose, + ) + ) + ) @property def model(self) -> sd_cpp.sd_ctx_t_p: @@ -216,6 +230,10 @@ def txt_to_img( sample_method = validate_and_set_input(sample_method, SAMPLE_METHOD_MAP, "sample_method") + # Ensure dimensions are multiples of 64 + width = validate_dimensions(width, "width") + height = validate_dimensions(height, "height") + # =========== Set seed =========== # Set a random seed if seed is negative @@ -241,26 +259,27 @@ def sd_progress_callback( control_cond = self._format_control_cond(control_cond, canny, self.control_net_path) - # Run the txt2img to generate images - c_images = sd_cpp.txt2img( - self.model, - prompt.encode("utf-8"), - negative_prompt.encode("utf-8"), - clip_skip, - cfg_scale, - guidance, - width, - height, - sample_method, - sample_steps, - seed, - batch_count, - control_cond, - control_strength, - style_strength, - normalize_input, - input_id_images_path.encode("utf-8"), - ) + with suppress_stdout_stderr(disable=self.verbose): + # Generate images + c_images = sd_cpp.txt2img( + self.model, + prompt.encode("utf-8"), + negative_prompt.encode("utf-8"), + clip_skip, + cfg_scale, + guidance, + width, + height, + sample_method, + sample_steps, + seed, + batch_count, + control_cond, + control_strength, + style_strength, + normalize_input, + input_id_images_path.encode("utf-8"), + ) # Convert the C array of images to a Python list of images return self._sd_image_t_p_to_images(c_images, batch_count, upscale_factor) @@ -328,6 +347,10 @@ def img_to_img( sample_method = validate_and_set_input(sample_method, SAMPLE_METHOD_MAP, "sample_method") + # Ensure dimensions are multiples of 64 + width = validate_dimensions(width, "width") + height = validate_dimensions(height, "height") + # =========== Set seed =========== # Set a random seed if seed is negative @@ -361,27 +384,29 @@ def sd_progress_callback( image_pointer = self._image_to_sd_image_t_p(image) - c_images = sd_cpp.img2img( - self.model, - image_pointer, - prompt.encode("utf-8"), - negative_prompt.encode("utf-8"), - clip_skip, - cfg_scale, - guidance, - width, - height, - sample_method, - sample_steps, - strength, - seed, - batch_count, - control_cond, - control_strength, - style_strength, - normalize_input, - input_id_images_path.encode("utf-8"), - ) + with suppress_stdout_stderr(disable=self.verbose): + # Generate images + c_images = sd_cpp.img2img( + self.model, + image_pointer, + prompt.encode("utf-8"), + negative_prompt.encode("utf-8"), + clip_skip, + cfg_scale, + guidance, + width, + height, + sample_method, + sample_steps, + strength, + seed, + batch_count, + control_cond, + control_strength, + style_strength, + normalize_input, + input_id_images_path.encode("utf-8"), + ) return self._sd_image_t_p_to_images(c_images, batch_count, upscale_factor) # ============================================ @@ -426,14 +451,20 @@ def img_to_vid( Returns: A list of Pillow Images.""" - if self.model is None: - raise Exception("Stable diffusion model not loaded.") + raise NotImplementedError("Not yet implemented.") - # =========== Validate string and int inputs =========== + # if self.model is None: + # raise Exception("Stable diffusion model not loaded.") - sample_method = validate_and_set_input(sample_method, SAMPLE_METHOD_MAP, "sample_method") + # # =========== Validate string and int inputs =========== - # =========== Set seed =========== + # sample_method = validate_and_set_input(sample_method, SAMPLE_METHOD_MAP, "sample_method") + + # # Ensure dimensions are multiples of 64 + # width = validate_dimensions(width, "width") + # height = validate_dimensions(height, "height") + + # # =========== Set seed =========== # # Set a random seed if seed is negative # if seed < 0: @@ -464,25 +495,26 @@ def img_to_vid( # image_pointer = self._image_to_sd_image_t_p(image) - # c_video = sd_cpp.img2vid( - # self.model, - # image_pointer, - # width, - # height, - # video_frames, - # motion_bucket_id, - # fps, - # augmentation_level, - # min_cfg, - # cfg_scale, - # sample_method, - # sample_steps, - # strength, - # seed, - # ) + # with suppress_stdout_stderr(disable=self.verbose): + # # Generate the video + # c_video = sd_cpp.img2vid( + # self.model, + # image_pointer, + # width, + # height, + # video_frames, + # motion_bucket_id, + # fps, + # augmentation_level, + # min_cfg, + # cfg_scale, + # sample_method, + # sample_steps, + # strength, + # seed, + # ) # return self._sd_image_t_p_to_images(c_video, video_frames, 1) - raise NotImplementedError("Not yet implemented.") # ============================================ # Preprocess Canny @@ -491,8 +523,6 @@ def img_to_vid( def preprocess_canny( self, image: Union[Image.Image, str], - width: int = 512, - height: int = 512, high_threshold: float = 0.08, low_threshold: float = 0.08, weak: float = 0.8, @@ -500,12 +530,10 @@ def preprocess_canny( inverse: bool = False, output_as_c_uint8: bool = False, ) -> Image.Image: - """Apply canny edge detection to an input image. + """Apply canny edge detection to an input image. Width and height determined automatically. Args: image: The input image path or Pillow Image. - width: Output image height, in pixel space. - height: Output image width, in pixel space. high_threshold: High edge detection threshold. low_threshold: Low edge detection threshold. weak: Weak edge thickness. @@ -519,17 +547,18 @@ def preprocess_canny( # Convert the image to a C uint8 pointer data, width, height = self._cast_image(image) - # Run the preprocess canny - c_image = sd_cpp.preprocess_canny( - data, - width, - height, - high_threshold, - low_threshold, - weak, - strong, - inverse, - ) + with suppress_stdout_stderr(disable=self.verbose): + # Run the preprocess canny + c_image = sd_cpp.preprocess_canny( + data, + int(width), + int(height), + high_threshold, + low_threshold, + weak, + strong, + inverse, + ) # Return the c_image if output_as_c_uint8 (for running inside txt2img/img2img pipeline) if output_as_c_uint8: @@ -592,12 +621,13 @@ def sd_progress_callback( # Convert the image to a byte array image_bytes = self._image_to_sd_image_t_p(image) - # Upscale the image - image = sd_cpp.upscale( - self.upscaler, - image_bytes, - upscale_factor, - ) + with suppress_stdout_stderr(disable=self.verbose): + # Upscale the image + image = sd_cpp.upscale( + self.upscaler, + image_bytes, + upscale_factor, + ) # Load the image from the C sd_image_t and convert it to a PIL Image image = self._dereference_sd_image_t_p(image) @@ -649,9 +679,9 @@ def _format_control_cond( if not control_cond: return None - if not control_net_path: - log_event(1, "'control_net_path' not set. Skipping control condition.") - return None + # if not control_net_path: + # log_event(1, "'control_net_path' not set. Skipping control condition.") + # return None if canny: # Convert Pillow Image to canny edge detection image then format into C sd_image_t @@ -773,6 +803,29 @@ def _bytes_to_image(self, byte_data: bytes, width: int, height: int): ) return image + def __setstate__(self, state): + self.__init__(**state) + + def close(self) -> None: + """Explicitly free the model from memory.""" + self._stack.close() + + def __del__(self) -> None: + self.close() + + +# ============================================ +# Validate dimension parameters +# ============================================ + + +def validate_dimensions(dimension: int | float, attribute_name: str) -> int: + """Dimensions must be a multiple of 64 otherwise a GGML_ASSERT error is encountered.""" + dimension = int(dimension) + if dimension <= 0 or dimension % 64 != 0: + raise ValueError(f"The '{attribute_name}' must be a multiple of 64.") + return dimension + # ============================================ # Mapping from strings to constants