Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ethan/bugfix #27

Merged
merged 5 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 52 additions & 52 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,57 @@
cmake_minimum_required(VERSION 3.16)

# Project: stable_diffusion_cpp
project(stable_diffusion_cpp)

option(STABLE_DIFFUSION_BUILD "Build stable-diffusion.cpp shared library and install alongside python package" ON)

if (STABLE_DIFFUSION_BUILD)
set(BUILD_SHARED_LIBS "ON")
option(SD_BUILD_SHARED_LIBS "" "ON")

# Building llama
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
# Need to disable these llama.cpp flags on Apple x86_64,
# otherwise users may encounter invalid instruction errors
set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE)
set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
set(GGML_FMA "Off" CACHE BOOL "ggml: enable FMA" FORCE)
set(GGML_F16C "Off" CACHE BOOL "ggml: enable F16C" FORCE)
endif()

add_subdirectory(dependency/stable-diffusion.cpp)
install(
TARGETS stable-diffusion
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
)

message(STATUS "SKBUILD_PLATLIB_DIR: ${SKBUILD_PLATLIB_DIR}")
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
install(
TARGETS stable-diffusion
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
)
# Workaround for Windows + CUDA
if (WIN32)
install(
FILES $<TARGET_RUNTIME_DLLS:stable-diffusion>
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
)
install(
FILES $<TARGET_RUNTIME_DLLS:stable-diffusion>
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
)
endif()
endif()

# Project: llama_cpp
project(llama_cpp)

Expand Down Expand Up @@ -122,55 +174,3 @@ if (LLAMA_BUILD)
endif()
endif()
endif()

# Project: stable_diffusion_cpp
project(stable_diffusion_cpp)

option(STABLE_DIFFUSION_BUILD "Build stable-diffusion.cpp shared library and install alongside python package" ON)

if (STABLE_DIFFUSION_BUILD)
set(BUILD_SHARED_LIBS "ON")
option(SD_BUILD_SHARED_LIBS "" "ON")

# Building llama
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
# Need to disable these llama.cpp flags on Apple x86_64,
# otherwise users may encounter invalid instruction errors
set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE)
set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
set(GGML_FMA "Off" CACHE BOOL "ggml: enable FMA" FORCE)
set(GGML_F16C "Off" CACHE BOOL "ggml: enable F16C" FORCE)
endif()

add_subdirectory(dependency/stable-diffusion.cpp)
install(
TARGETS stable-diffusion
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
)

message(STATUS "SKBUILD_PLATLIB_DIR: ${SKBUILD_PLATLIB_DIR}")
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
install(
TARGETS stable-diffusion
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
)
# Workaround for Windows + CUDA
if (WIN32)
install(
FILES $<TARGET_RUNTIME_DLLS:stable-diffusion>
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
)
install(
FILES $<TARGET_RUNTIME_DLLS:stable-diffusion>
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
)
endif()
endif()
19 changes: 16 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Detailed API documentation is available [here](docs/index.html).

## Installation

**GPU version(optional)**
**GPU version(optional)**

check if you have GPU acceleration (torch required)
<details>
Expand All @@ -40,16 +40,24 @@ check if you have GPU acceleration (torch required)
```
CMAKE_ARGS="-DGGML_CUDA=on -DSD_CUBLAS=ON" pip install nexaai-gpu
```
Or you prefer to install our pre-built wheel:
```bash
pip install nexaai-cuda --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple
```
</details>
<details>
<summary>Apple M Chip:</summary>
Apple icon -> about this mac -> Graphics

if True:

```
CMAKE_ARGS="-DGGML_METAL=on -DSD_METAL=ON" pip install nexaai-gpu
```
Or you prefer to install our pre-built wheel:
```bash
pip install nexaai-metal --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple
```
</details>

<details>
Expand Down Expand Up @@ -77,7 +85,12 @@ check if you have GPU acceleration (torch required)
```
pip install nexaai
```
<details>
</details>

Or you prefer to install the pre-built wheel:
```bash
pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple
```

## Nexa CLI commands

Expand Down
24 changes: 12 additions & 12 deletions nexa/gguf/nexa_inference_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class NexaImageInference:
streamlit (bool): Run the inference in Streamlit UI.

"""


def __init__(self, model_path, **kwargs):
self.model_path = None
Expand Down Expand Up @@ -81,7 +81,7 @@ def __init__(self, model_path, **kwargs):
logging.error("Failed to load the model or pipeline.")
exit(1)

# @SpinningCursorAnimation()
@SpinningCursorAnimation()
def _load_model(self, model_path: str):
with suppress_stdout_stderr():
from nexa.gguf.sd.stable_diffusion import StableDiffusion
Expand All @@ -108,9 +108,9 @@ def _save_images(self, images):
file_path = os.path.join(output_dir, file_name)
image.save(file_path)
logging.info(f"\nImage {i+1} saved to: {file_path}")
def txt2img(self,
prompt,

def txt2img(self,
prompt,
negative_prompt="",
cfg_scale=7.5,
width=512,
Expand Down Expand Up @@ -151,7 +151,7 @@ def run_txt2img(self):
)
try:
images = self.txt2img(
prompt,
prompt,
negative_prompt,
cfg_scale=self.params["guidance_scale"],
width=self.params["width"],
Expand All @@ -169,9 +169,9 @@ def run_txt2img(self):
except Exception as e:
logging.error(f"Error during generation: {e}", exc_info=True)

def img2img(self,
image_path,
prompt,
def img2img(self,
image_path,
prompt,
negative_prompt="",
cfg_scale=7.5,
width=512,
Expand Down Expand Up @@ -213,8 +213,8 @@ def run_img2img(self):
negative_prompt = nexa_prompt(
"Enter your negative prompt (press Enter to skip): "
)
images = self.img2img(image_path,
prompt,
images = self.img2img(image_path,
prompt,
negative_prompt,
cfg_scale=self.params["guidance_scale"],
width=self.params["width"],
Expand All @@ -224,7 +224,7 @@ def run_img2img(self):
control_cond=self.params.get("control_image_path", ""),
control_strength=self.params.get("control_strength", 0.9),
)

self._save_images(images)
except KeyboardInterrupt:
print(EXIT_REMINDER)
Expand Down
10 changes: 5 additions & 5 deletions nexa/gguf/nexa_inference_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class NexaTextInference:
top_k (int): Top-k sampling parameter.
top_p (float): Top-p sampling parameter
"""

def __init__(self, model_path, stop_words=None, **kwargs):
self.params = DEFAULT_TEXT_GEN_PARAMS
self.params.update(kwargs)
Expand Down Expand Up @@ -98,7 +98,7 @@ def create_embedding(
"""
return self.model.create_embedding(input)

# @SpinningCursorAnimation()
@SpinningCursorAnimation()
def _load_model(self):
logging.debug(f"Loading model from {self.downloaded_path}")
start_time = time.time()
Expand Down Expand Up @@ -140,7 +140,7 @@ def _load_model(self):

def run(self):
"""
CLI interactive session. Not for SDK.
CLI interactive session. Not for SDK.
"""
while True:
generated_text = ""
Expand Down Expand Up @@ -189,7 +189,7 @@ def run(self):
except Exception as e:
logging.error(f"Error during generation: {e}", exc_info=True)
print("\n")

def create_chat_completion(self, messages, temperature=0.7, max_tokens=2048, top_k=50, top_p=1.0, stream=False, stop=None):
"""
Used for SDK. Generate completion for a chat conversation.
Expand All @@ -207,7 +207,7 @@ def create_chat_completion(self, messages, temperature=0.7, max_tokens=2048, top
Iterator: Iterator for the completion.
"""
return self.model.create_chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens, top_k=top_k, top_p=top_p, stream=stream, stop=stop)

def create_completion(self, prompt, temperature=0.7, max_tokens=2048, top_k=50, top_p=1.0, echo=False, stream=False, stop=None):
"""
Used for SDK. Generate completion for a given prompt.
Expand Down
24 changes: 12 additions & 12 deletions nexa/gguf/nexa_inference_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class NexaVLMInference:
top_k (int): Top-k sampling parameter.
top_p (float): Top-p sampling parameter
"""


def __init__(self, model_path, stop_words=None, **kwargs):
self.params = DEFAULT_TEXT_GEN_PARAMS
Expand Down Expand Up @@ -146,7 +146,7 @@ def __init__(self, model_path, stop_words=None, **kwargs):
)
exit(1)

# @SpinningCursorAnimation()
@SpinningCursorAnimation()
def _load_model(self):
logging.debug(f"Loading model from {self.downloaded_path}")
start_time = time.time()
Expand Down Expand Up @@ -240,18 +240,18 @@ def run(self):
except Exception as e:
logging.error(f"Error during generation: {e}", exc_info=True)
print("\n")
def create_chat_completion(self,
messages,
max_tokens:int = 2048,

def create_chat_completion(self,
messages,
max_tokens:int = 2048,
temperature: float = 0.2,
top_p: float = 0.95,
top_k: int = 40,
stream=False,
stream=False,
stop=[]):
"""
Generate text completion for a given chat prompt.

Args:
messages (list): List of messages in the chat prompt.
temperature (float): Temperature for sampling.
Expand All @@ -260,7 +260,7 @@ def create_chat_completion(self,
top_p (float): Top-p sampling parameter.
stream (bool): Stream the output.
stop (list): List of stop words for early stopping.

Returns:
Iterator: An iterator of the generated text completion
return format:
Expand All @@ -285,9 +285,9 @@ def create_chat_completion(self,
"prompt_tokens": 57,
"total_tokens": 74
}
}
usage: message = completion.choices[0].message.content
}
usage: message = completion.choices[0].message.content

"""
return self.model.create_chat_completion(
messages=messages,
Expand Down
6 changes: 3 additions & 3 deletions nexa/gguf/nexa_inference_voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(self, model_path, **kwargs):
exit(1)


# @SpinningCursorAnimation()
@SpinningCursorAnimation()
def _load_model(self):
from faster_whisper import WhisperModel

Expand All @@ -91,7 +91,7 @@ def run(self):
print(EXIT_REMINDER)
except Exception as e:
logging.error(f"Error during text generation: {e}", exc_info=True)

def transcribe(self, audio, **kwargs):
"""
Transcribe the audio file.
Expand Down Expand Up @@ -171,7 +171,7 @@ def transcribe(self, audio, **kwargs):
audio,
**kwargs,
)


def _transcribe_audio(self, audio_path):
logging.debug(f"Transcribing audio from: {audio_path}")
Expand Down
8 changes: 7 additions & 1 deletion nexa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,13 @@ def _spin(self):

def __enter__(self):
if self._use_alternate_stream:
self.stream = open("/dev/tty", "w")
if sys.platform == "win32": # Windows
self.stream = open('CONOUT$', "w")
else:
try:
self.stream = open('/dev/tty', "w")
except (FileNotFoundError, OSError):
self.stream = open('/dev/stdout', "w")
self.thread = threading.Thread(target=self._spin)
self.thread.start()
return self
Expand Down
2 changes: 1 addition & 1 deletion tomls/pyproject_cuda.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ build.verbose = true
cmake.build-type = "Release"
cmake.version = ">=3.16"
cmake.args = [
"-DGGML_CUDA=ON",
"-DSD_CUBLAS=ON",
"-DGGML_CUDA=ON",
"-DCMAKE_CUDA_ARCHITECTURES=all",
"-DGGML_CUDA_FORCE_MMQ=ON",
"-DGGML_AVX2=OFF",
Expand Down
Loading
Loading