Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
plusbang committed Nov 28, 2024
1 parent 929cb3a commit e275f27
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,13 @@
parser.add_argument("--disable-transpose-value-cache", action="store_true", default=False)
parser.add_argument("--intra-pp", type=int, default=None)
parser.add_argument("--inter-pp", type=int, default=None)
parser.add_argument("--mixed-precision", action='store_true')
parser.add_argument("--mixed-precision", action='store_false')
parser.add_argument("--save-directory", type=str,
required=True,
help="The path of folder to save converted model, "
"If path not exists, lowbit model will be saved there. "
"Else, program will raise error.",
)

args = parser.parse_args()
model_path = args.repo_id_or_model_path
Expand All @@ -74,6 +80,7 @@
transpose_value_cache=not args.disable_transpose_value_cache,
mixed_precision=args.mixed_precision,
quantization_group_size=args.quantization_group_size,
save_directory=args.save_directory
)
else:
model = AutoModelForCausalLM.load_low_bit(
Expand Down
3 changes: 2 additions & 1 deletion python/llm/src/ipex_llm/transformers/npu_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,15 @@ def optimize_npu_model(cls, *args, **kwargs):
model.share_memory()

if not pipeline:
if model.config.model_type in ["qwen2", "llama", "minicpm"]:
if model.config.model_type in ["qwen2"]:
from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
optimize_llm_single_process(
llm,
kv_len=max_context_len,
max_prompt_len=max_prompt_len,
transpose_value_cache=transpose_value_cache,
group_size=quantization_group_size,
qtype=qtype,
save_directory=save_directory
)
else:
Expand Down
8 changes: 5 additions & 3 deletions python/llm/src/ipex_llm/transformers/npu_models/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def generate(
output = torch.stack(output_tokens, dim=1)
output = torch.cat((inputs, output), dim=1)
time_t3 = time.perf_counter()

reset(self.model_ptr)
self.first_cost = time_t2 - time_t1 # seconds
self.rest_cost_mean = (time_t3 - time_t2) / (idx - 1) # seconds
Expand All @@ -345,16 +345,18 @@ def optimize_llm_single_process(
max_prompt_len: int,
transpose_value_cache: bool,
group_size: int,
qtype: str,
save_directory: str
):
from ipex_llm.transformers.npu_pipeline_model.convert_pipeline import convert_llm_for_deploy, convert_llm
from ipex_llm.transformers.npu_pipeline_model.convert_pipeline import convert_llm
from .npu_llm_cpp import load_model_from_file

convert_llm(model,
kv_len=kv_len,
max_prompt_len=max_prompt_len,
transpose_value_cache=transpose_value_cache,
group_size=group_size,
qtype=qtype,
convert_model=True,
save_directory=save_directory)
try:
Expand All @@ -364,7 +366,7 @@ def optimize_llm_single_process(
model.vocab_size = model.config.vocab_size
except:
invalidInputError(False,
"False to InitLLMPipeline.")
"False to InitLLMPipeline.")
# patch generate function
import types
model.generate = types.MethodType(generate, model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def get_shared_lib_info(lib_base_name: str):


_, _lib_path = get_shared_lib_info("npu_llm")
print(f'_lib_path is {_lib_path}......')

# Load the library
_lib = ctypes.cdll.LoadLibrary(_lib_path)
Expand All @@ -61,6 +60,7 @@ def get_shared_lib_info(lib_base_name: str):
_lib.reset.argtypes = [ctypes.c_void_p]
_lib.reset.restype = None


def load_model_from_file(model_dir: str):
return _lib.load_model_from_file(model_dir.encode('utf-8'))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,8 @@ def convert_llm_for_deploy(model: torch.nn.Module,
if not os.path.exists(save_directory):
os.mkdir(save_directory)
weight_dir = os.path.join(save_directory, "model_weights")
os.mkdir(weight_dir)
if not os.path.exists(weight_dir):
os.mkdir(weight_dir)
layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1"

if model.config.model_type == "qwen2":
Expand Down

0 comments on commit e275f27

Please sign in to comment.