Skip to content

Commit

Permalink
0.3.5 - add more logging and increase default memory limit
Browse files Browse the repository at this point in the history
  • Loading branch information
Benjoyo committed May 6, 2024
1 parent 9cb513e commit 082c77b
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 2 deletions.
2 changes: 1 addition & 1 deletion bpm_ai_inference/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
host=os.getenv('DAEMON_HOST', '0.0.0.0'),
port=int(os.getenv('DAEMON_PORT', 6666)),
instance_strategy=os.getenv('INSTANCE_STRATEGY', 'memory_limit'),
max_memory=int(os.getenv('SOFT_MEMORY_LIMIT', 8_589_934_592))
max_memory=int(os.getenv('SOFT_MEMORY_LIMIT', 16_000_000_000))
)

for c in remote_classes:
Expand Down
2 changes: 1 addition & 1 deletion bpm_ai_inference/llm/llama_cpp/llama_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(
filename: str = DEFAULT_QUANT_BALANCED,
temperature: float = DEFAULT_TEMPERATURE,
max_retries: int = DEFAULT_MAX_RETRIES,
force_offline: bool = os.getenv(FORCE_OFFLINE_FLAG, False)
force_offline: bool = (os.getenv(FORCE_OFFLINE_FLAG, "false").lower() == "true")
):
if not has_llama_cpp_python:
raise ImportError('llama-cpp-python is not installed')
Expand Down
3 changes: 3 additions & 0 deletions bpm_ai_inference/util/optimum.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def _holisticon_onnx_repository_id(model_name: str) -> str:


def get_optimized_model(model: str, task: str, optimization_level: int = None, push_to_hub: bool = False):
logger.info(f"Loading model {model}...")

model_name = model
model_dir = hf_home() + "/onnx/" + model.replace("/", "--")
tokenizer = AutoTokenizer.from_pretrained(model)
Expand Down Expand Up @@ -123,6 +125,7 @@ def _export_to_onnx(repository_id: str, model_dir, task):

@timer
def _optimize(repository_id: str, model_dir, task, push_to_hub=False):
logger.info(f"Optimizing model {repository_id}...")
model_class = _task_to_model(task)

# try to load from hub or cache
Expand Down

0 comments on commit 082c77b

Please sign in to comment.