diff --git a/CMakeLists.txt b/CMakeLists.txt index 5de11a78..8e2be6d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,57 @@ cmake_minimum_required(VERSION 3.16) +# Project: stable_diffusion_cpp +project(stable_diffusion_cpp) + +option(STABLE_DIFFUSION_BUILD "Build stable-diffusion.cpp shared library and install alongside python package" ON) + +if (STABLE_DIFFUSION_BUILD) + set(BUILD_SHARED_LIBS "ON") + option(SD_BUILD_SHARED_LIBS "" "ON") + + # Building llama + if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") + # Need to disable these llama.cpp flags on Apple x86_64, + # otherwise users may encounter invalid instruction errors + set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE) + set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE) + set(GGML_FMA "Off" CACHE BOOL "ggml: enable FMA" FORCE) + set(GGML_F16C "Off" CACHE BOOL "ggml: enable F16C" FORCE) + endif() + + add_subdirectory(dependency/stable-diffusion.cpp) + install( + TARGETS stable-diffusion + LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib + RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib + ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib + FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib + RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib + ) + + message(STATUS "SKBUILD_PLATLIB_DIR: ${SKBUILD_PLATLIB_DIR}") + # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374 + install( + TARGETS stable-diffusion + LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib + RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib + ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib + FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib + RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib + ) + # Workaround for Windows + CUDA + if (WIN32) + install( + FILES $ + DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib + ) + install( + FILES $ + DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib + ) + endif() +endif() + # Project: llama_cpp project(llama_cpp) @@ -122,55 +174,3 @@ if (LLAMA_BUILD) endif() endif() endif() - -# Project: stable_diffusion_cpp -project(stable_diffusion_cpp) - -option(STABLE_DIFFUSION_BUILD "Build stable-diffusion.cpp shared library and install alongside python package" ON) - -if (STABLE_DIFFUSION_BUILD) - set(BUILD_SHARED_LIBS "ON") - option(SD_BUILD_SHARED_LIBS "" "ON") - - # Building llama - if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") - # Need to disable these llama.cpp flags on Apple x86_64, - # otherwise users may encounter invalid instruction errors - set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE) - set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE) - set(GGML_FMA "Off" CACHE BOOL "ggml: enable FMA" FORCE) - set(GGML_F16C "Off" CACHE BOOL "ggml: enable F16C" FORCE) - endif() - - add_subdirectory(dependency/stable-diffusion.cpp) - install( - TARGETS stable-diffusion - LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib - RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib - ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib - FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib - RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib - ) - - message(STATUS "SKBUILD_PLATLIB_DIR: ${SKBUILD_PLATLIB_DIR}") - # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374 - install( - TARGETS stable-diffusion - LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib - RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib - ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib - FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib - RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib - ) - # Workaround for Windows + CUDA - if (WIN32) - install( - FILES $ - DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib - ) - install( - FILES $ - DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib - ) - endif() -endif() \ No newline at end of file diff --git a/README.md b/README.md index b1716169..939d93db 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Detailed API documentation is available [here](docs/index.html). ## Installation -**GPU version(optional)** +**GPU version(optional)** check if you have GPU acceleration (torch required)
@@ -40,16 +40,24 @@ check if you have GPU acceleration (torch required) ``` CMAKE_ARGS="-DGGML_CUDA=on -DSD_CUBLAS=ON" pip install nexaai-gpu ``` + Or you prefer to install our pre-built wheel: + ```bash + pip install nexaai-cuda --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple + ```
Apple M Chip: Apple icon -> about this mac -> Graphics - + if True: ``` CMAKE_ARGS="-DGGML_METAL=on -DSD_METAL=ON" pip install nexaai-gpu ``` + Or you prefer to install our pre-built wheel: + ```bash + pip install nexaai-metal --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple + ```
@@ -77,7 +85,12 @@ check if you have GPU acceleration (torch required) ``` pip install nexaai ``` -
+
+ +Or you prefer to install the pre-built wheel: +```bash +pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple +``` ## Nexa CLI commands diff --git a/nexa/gguf/nexa_inference_image.py b/nexa/gguf/nexa_inference_image.py index 494374d7..8c85645a 100644 --- a/nexa/gguf/nexa_inference_image.py +++ b/nexa/gguf/nexa_inference_image.py @@ -43,7 +43,7 @@ class NexaImageInference: streamlit (bool): Run the inference in Streamlit UI. """ - + def __init__(self, model_path, **kwargs): self.model_path = None @@ -81,7 +81,7 @@ def __init__(self, model_path, **kwargs): logging.error("Failed to load the model or pipeline.") exit(1) - # @SpinningCursorAnimation() + @SpinningCursorAnimation() def _load_model(self, model_path: str): with suppress_stdout_stderr(): from nexa.gguf.sd.stable_diffusion import StableDiffusion @@ -108,9 +108,9 @@ def _save_images(self, images): file_path = os.path.join(output_dir, file_name) image.save(file_path) logging.info(f"\nImage {i+1} saved to: {file_path}") - - def txt2img(self, - prompt, + + def txt2img(self, + prompt, negative_prompt="", cfg_scale=7.5, width=512, @@ -151,7 +151,7 @@ def run_txt2img(self): ) try: images = self.txt2img( - prompt, + prompt, negative_prompt, cfg_scale=self.params["guidance_scale"], width=self.params["width"], @@ -169,9 +169,9 @@ def run_txt2img(self): except Exception as e: logging.error(f"Error during generation: {e}", exc_info=True) - def img2img(self, - image_path, - prompt, + def img2img(self, + image_path, + prompt, negative_prompt="", cfg_scale=7.5, width=512, @@ -213,8 +213,8 @@ def run_img2img(self): negative_prompt = nexa_prompt( "Enter your negative prompt (press Enter to skip): " ) - images = self.img2img(image_path, - prompt, + images = self.img2img(image_path, + prompt, negative_prompt, cfg_scale=self.params["guidance_scale"], width=self.params["width"], @@ -224,7 +224,7 @@ def run_img2img(self): control_cond=self.params.get("control_image_path", ""), control_strength=self.params.get("control_strength", 0.9), ) - + self._save_images(images) except KeyboardInterrupt: print(EXIT_REMINDER) diff --git a/nexa/gguf/nexa_inference_text.py b/nexa/gguf/nexa_inference_text.py index fa59e7ee..2760d5d1 100644 --- a/nexa/gguf/nexa_inference_text.py +++ b/nexa/gguf/nexa_inference_text.py @@ -40,7 +40,7 @@ class NexaTextInference: top_k (int): Top-k sampling parameter. top_p (float): Top-p sampling parameter """ - + def __init__(self, model_path, stop_words=None, **kwargs): self.params = DEFAULT_TEXT_GEN_PARAMS self.params.update(kwargs) @@ -98,7 +98,7 @@ def create_embedding( """ return self.model.create_embedding(input) - # @SpinningCursorAnimation() + @SpinningCursorAnimation() def _load_model(self): logging.debug(f"Loading model from {self.downloaded_path}") start_time = time.time() @@ -140,7 +140,7 @@ def _load_model(self): def run(self): """ - CLI interactive session. Not for SDK. + CLI interactive session. Not for SDK. """ while True: generated_text = "" @@ -189,7 +189,7 @@ def run(self): except Exception as e: logging.error(f"Error during generation: {e}", exc_info=True) print("\n") - + def create_chat_completion(self, messages, temperature=0.7, max_tokens=2048, top_k=50, top_p=1.0, stream=False, stop=None): """ Used for SDK. Generate completion for a chat conversation. @@ -207,7 +207,7 @@ def create_chat_completion(self, messages, temperature=0.7, max_tokens=2048, top Iterator: Iterator for the completion. """ return self.model.create_chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens, top_k=top_k, top_p=top_p, stream=stream, stop=stop) - + def create_completion(self, prompt, temperature=0.7, max_tokens=2048, top_k=50, top_p=1.0, echo=False, stream=False, stop=None): """ Used for SDK. Generate completion for a given prompt. diff --git a/nexa/gguf/nexa_inference_vlm.py b/nexa/gguf/nexa_inference_vlm.py index 27c057be..e5627ffc 100644 --- a/nexa/gguf/nexa_inference_vlm.py +++ b/nexa/gguf/nexa_inference_vlm.py @@ -86,7 +86,7 @@ class NexaVLMInference: top_k (int): Top-k sampling parameter. top_p (float): Top-p sampling parameter """ - + def __init__(self, model_path, stop_words=None, **kwargs): self.params = DEFAULT_TEXT_GEN_PARAMS @@ -146,7 +146,7 @@ def __init__(self, model_path, stop_words=None, **kwargs): ) exit(1) - # @SpinningCursorAnimation() + @SpinningCursorAnimation() def _load_model(self): logging.debug(f"Loading model from {self.downloaded_path}") start_time = time.time() @@ -240,18 +240,18 @@ def run(self): except Exception as e: logging.error(f"Error during generation: {e}", exc_info=True) print("\n") - - def create_chat_completion(self, - messages, - max_tokens:int = 2048, + + def create_chat_completion(self, + messages, + max_tokens:int = 2048, temperature: float = 0.2, top_p: float = 0.95, top_k: int = 40, - stream=False, + stream=False, stop=[]): """ Generate text completion for a given chat prompt. - + Args: messages (list): List of messages in the chat prompt. temperature (float): Temperature for sampling. @@ -260,7 +260,7 @@ def create_chat_completion(self, top_p (float): Top-p sampling parameter. stream (bool): Stream the output. stop (list): List of stop words for early stopping. - + Returns: Iterator: An iterator of the generated text completion return format: @@ -285,9 +285,9 @@ def create_chat_completion(self, "prompt_tokens": 57, "total_tokens": 74 } - } - usage: message = completion.choices[0].message.content - + } + usage: message = completion.choices[0].message.content + """ return self.model.create_chat_completion( messages=messages, diff --git a/nexa/gguf/nexa_inference_voice.py b/nexa/gguf/nexa_inference_voice.py index fc8034e3..f61f872c 100644 --- a/nexa/gguf/nexa_inference_voice.py +++ b/nexa/gguf/nexa_inference_voice.py @@ -69,7 +69,7 @@ def __init__(self, model_path, **kwargs): exit(1) - # @SpinningCursorAnimation() + @SpinningCursorAnimation() def _load_model(self): from faster_whisper import WhisperModel @@ -91,7 +91,7 @@ def run(self): print(EXIT_REMINDER) except Exception as e: logging.error(f"Error during text generation: {e}", exc_info=True) - + def transcribe(self, audio, **kwargs): """ Transcribe the audio file. @@ -171,7 +171,7 @@ def transcribe(self, audio, **kwargs): audio, **kwargs, ) - + def _transcribe_audio(self, audio_path): logging.debug(f"Transcribing audio from: {audio_path}") diff --git a/nexa/utils.py b/nexa/utils.py index 33499a27..2483582f 100644 --- a/nexa/utils.py +++ b/nexa/utils.py @@ -132,7 +132,13 @@ def _spin(self): def __enter__(self): if self._use_alternate_stream: - self.stream = open("/dev/tty", "w") + if sys.platform == "win32": # Windows + self.stream = open('CONOUT$', "w") + else: + try: + self.stream = open('/dev/tty', "w") + except (FileNotFoundError, OSError): + self.stream = open('/dev/stdout', "w") self.thread = threading.Thread(target=self._spin) self.thread.start() return self diff --git a/tomls/pyproject_cuda.toml b/tomls/pyproject_cuda.toml index 4233cd49..69c93f58 100644 --- a/tomls/pyproject_cuda.toml +++ b/tomls/pyproject_cuda.toml @@ -81,8 +81,8 @@ build.verbose = true cmake.build-type = "Release" cmake.version = ">=3.16" cmake.args = [ - "-DGGML_CUDA=ON", "-DSD_CUBLAS=ON", + "-DGGML_CUDA=ON", "-DCMAKE_CUDA_ARCHITECTURES=all", "-DGGML_CUDA_FORCE_MMQ=ON", "-DGGML_AVX2=OFF", diff --git a/tomls/pyproject_metal.toml b/tomls/pyproject_metal.toml index 584ebf07..a14b1155 100644 --- a/tomls/pyproject_metal.toml +++ b/tomls/pyproject_metal.toml @@ -81,8 +81,8 @@ build.verbose = true cmake.build-type = "Release" cmake.version = ">=3.16" cmake.args = [ - "-DGGML_METAL=ON", "-DSD_METAL=ON", + "-DGGML_METAL=ON", "-DCMAKE_OSX_ARCHITECTURES=arm64", "-DCMAKE_APPLE_SILICON_PROCESSOR=arm64" ]