diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 52611a1c..ece27ca7 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -332,7 +332,7 @@ def _select_quantization_type(): except ValueError: print("Please enter a valid number.") -def _store_in_nexa_list(converted_path, model_type): +def _store_in_nexa_list(converted_path, model_type, input_name, output_ftype): """Helper function to store converted model in nexa list.""" import shutil from nexa.general import add_model_to_list @@ -346,7 +346,8 @@ def _store_in_nexa_list(converted_path, model_type): shutil.copy2(converted_path, nexa_list_path) # Add the new path to the model list - add_model_to_list(os.path.basename(converted_path), nexa_list_path, "gguf", model_type) + nexa_model_name = f"{input_name}:{output_ftype}" + add_model_to_list(nexa_model_name, nexa_list_path, "gguf", model_type) def _run_converted_model(converted_path, model_type): """Helper function to run the converted model.""" @@ -393,7 +394,7 @@ def run_convert(args): try: from nexa.gguf.converter.nexa_convert import convert_hf_to_quantized_gguf - converted_path = convert_hf_to_quantized_gguf( + converted_path, input_name, output_ftype = convert_hf_to_quantized_gguf( input_path, output_file=args.output_file, ftype=ftype, @@ -406,7 +407,7 @@ def run_convert(args): # Ask if user wants to store in nexa list store_choice = input("\nWould you like to store this model in nexa list so you can run it with `nexa run ` anywhere and anytime? (y/N): ").strip().lower() if store_choice == 'y': - _store_in_nexa_list(converted_path, model_type) + _store_in_nexa_list(converted_path, model_type, input_name, output_ftype) # Ask if user wants to run the model run_choice = input("\nWould you like to run the converted model? (y/N): ").strip().lower() @@ -416,7 +417,8 @@ def run_convert(args): print("Exiting without running the model.") print(f"\nConverted model stored at {converted_path}") - running_command = f"nexa run {converted_path.split('/')[-1]}"\ + nexa_model_name = f"{input_name}:{output_ftype}" + running_command = f"nexa run {nexa_model_name}"\ if store_choice == 'y' else f"nexa run {converted_path} -lp -mt {model_type}" print(f"\nYou can run the converted model with command: {running_command}") else: diff --git a/nexa/general.py b/nexa/general.py index 3190c49d..aa88e710 100644 --- a/nexa/general.py +++ b/nexa/general.py @@ -594,6 +594,15 @@ def is_model_exists(model_name): with open(NEXA_MODEL_LIST_PATH, "r") as f: model_list = json.load(f) + + # For AudioLM and Multimodal models, should check the file location instead of model name + if ":" in model_name: + model_path_with_slash = model_name.replace(":", "/") + + # Check if model_prefix/model_suffix exists in any location path + for model_key, model_info in model_list.items(): + if model_path_with_slash in model_info["location"]: + return model_key return model_name in model_list @@ -606,6 +615,13 @@ def add_model_to_list(model_name, model_location, model_type, run_type): model_list = json.load(f) else: model_list = {} + + # For AudioLM and Multimodal models, should remove the "model-" prefix from the tag name + if run_type == "AudioLM" or run_type == "Multimodal": + tag_name = model_name.split(":")[1] + if tag_name.startswith("model-"): + tag_name = tag_name[6:] + model_name = f"{model_name.split(':')[0]}:{tag_name}" model_list[model_name] = { "type": model_type, @@ -624,11 +640,21 @@ def get_model_info(model_name): with open(NEXA_MODEL_LIST_PATH, "r") as f: model_list = json.load(f) + # First try direct lookup model_data = model_list.get(model_name, {}) - location = model_data.get("location") - run_type = model_data.get("run_type") + if model_data: + return model_data.get("location"), model_data.get("run_type") + + # If not found and model_name contains ":", try path-based lookup + if ":" in model_name: + model_path_with_slash = model_name.replace(":", "/") + + # Check if model_prefix/model_suffix exists in any location path + for model_key, model_info in model_list.items(): + if model_path_with_slash in model_info["location"]: + return model_info["location"], model_info["run_type"] - return location, run_type + return None, None def list_models(): @@ -642,7 +668,7 @@ def list_models(): filtered_list = { model_name: model_info for model_name, model_info in model_list.items() - if not model_name.split(':')[1].startswith('projector') + if ':' not in model_name or not model_name.split(':')[1].startswith('projector') } table = [ diff --git a/nexa/gguf/converter/nexa_convert.py b/nexa/gguf/converter/nexa_convert.py index 5e13c16a..7c24771b 100644 --- a/nexa/gguf/converter/nexa_convert.py +++ b/nexa/gguf/converter/nexa_convert.py @@ -110,7 +110,7 @@ def convert_hf_to_quantized_gguf( ftype: str = "q4_0", convert_type: str = "f16", **kwargs -) -> Optional[str]: +) -> Optional[tuple[str, str, str]]: """ Convert a model in safetensors format to a quantized GGUF file. @@ -118,14 +118,14 @@ def convert_hf_to_quantized_gguf( It can process both directories containing .safetensors files and existing .gguf files. Args: - input_path (str): Path to the input Hugging Face model directory or GGUF file. + input_path (str): Path in the local file system to the input Hugging Face model directory or GGUF file. output_file (str, optional): Path to the output quantized GGUF file. If None, a default path will be used. ftype (str, optional): Quantization type (default: "q4_0"). convert_type (str, optional): Conversion type for safetensors to GGUF (default: "f16"). **kwargs: Additional keyword arguments for the conversion and quantization process. Returns: - Optional[str]: Path to the output quantized GGUF file if successful, None otherwise. + Optional[tuple[str, str, str]]: Tuple of (output_file_path, input_name, ftype) if successful, None otherwise. Raises: FileNotFoundError: If the input directory or file does not exist. @@ -139,11 +139,13 @@ def convert_hf_to_quantized_gguf( # Convert input path to absolute path input_path = os.path.abspath(input_path) + # Get input name early + input_name = os.path.basename(input_path) + if input_path.endswith('.gguf'): + input_name = os.path.splitext(input_name)[0] # Remove .gguf extension + # Set default output file if not provided if not output_file: - input_name = os.path.basename(input_path) - if input_path.endswith('.gguf'): - input_name = os.path.splitext(input_name)[0] # Remove .gguf extension output_file = os.path.abspath(f"./{input_name}-{ftype}.gguf") else: output_file = os.path.abspath(output_file) @@ -168,7 +170,7 @@ def convert_hf_to_quantized_gguf( # Quantize GGUF model quantize_model(str(tmp_file_path.absolute()), output_file, ftype, **kwargs) - return output_file + return output_file, input_name, ftype finally: # Delete the temporary file if tmp_file_path.exists(): @@ -179,7 +181,7 @@ def convert_hf_to_quantized_gguf( elif input_path.endswith('.gguf'): # Directly call quantize_model with input_path quantize_model(input_file=input_path, output_file=output_file, ftype=ftype, **kwargs) - return output_file + return output_file, input_name, ftype else: logger.error(f"Invalid input path: {input_path}. Must be a directory with .safetensors files or a .gguf file.") return None