You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
curl 127.0.0.1:8080/generate \
-X POST \
-d '{
"inputs": "[INST] Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? [/INST]",
"parameters": {
"max_new_tokens": 64,
"adapter_id": "/root/model/adapters/shiji",
"adapter_source": "local"
}
}' \
-H 'Content-Type: application/json'
Got error:
2024-11-27T22:44:42.636400Z ERROR lorax_launcher: interceptor.py:41 Method Prefill encountered an error.
Traceback (most recent call last):
File "/opt/conda/bin/lorax-server", line 8, in <module>
sys.exit(app())
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 311, in __call__
return get_command(self)(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
return self.main(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 778, in main
return _main(
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/opt/conda/lib/python3.10/site-packages/lorax_server/cli.py", line 92, in serve
server.serve(
File "/opt/conda/lib/python3.10/site-packages/lorax_server/server.py", line 449, in serve
asyncio.run(
File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete
self.run_forever()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
File "/opt/conda/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/opt/conda/lib/python3.10/site-packages/grpc_interceptor/server.py", line 165, in invoke_intercept_method
return await self.intercept(
> File "/opt/conda/lib/python3.10/site-packages/lorax_server/interceptor.py", line 38, in intercept
return await response
File "/opt/conda/lib/python3.10/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py", line 82, in _unary_interceptor
raise error
File "/opt/conda/lib/python3.10/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py", line 73, in _unary_interceptor
return await behavior(request_or_iterator, context)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/server.py", line 111, in Prefill
generations, next_batch = self.model.generate_token(batch)
File "/opt/conda/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 1608, in generate_token
out, speculative_logits = self.forward(batch, adapter_data)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 1529, in forward
out = model.forward(
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_llama_modeling.py", line 604, in forward
hidden_states = self.model(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_llama_modeling.py", line 541, in forward
hidden_states, residual = layer(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_llama_modeling.py", line 466, in forward
attn_output = self.self_attn(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_llama_modeling.py", line 317, in forward
qkv = self.query_key_value(hidden_states, adapter_data)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/layers.py", line 250, in forward
result = self.forward_layer_type(result, input, adapter_data, layer_name, start_idx, end_idx)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/layers.py", line 96, in forward_layer_type
adapter_data.punica_wrapper.add_lora(
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/punica.py", line 730, in add_lora
self.add_shrink(buffer, x, wa_t_all, scale)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/punica.py", line 652, in add_shrink
shrink_fun(y, x, w_t_all, scale)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/punica.py", line 559, in shrink_prefill
sgmv_shrink(
File "/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/ops/sgmv_shrink.py", line 142, in sgmv_shrink
assert inputs.size(1) == lora_a_weights.size(-1)
AssertionError
Expected behavior
Should be able to preload adapters.
The text was updated successfully, but these errors were encountered:
System Info
lorax 0.12.1 container
Information
Tasks
Reproduction
I followed the document https://github.com/predibase/lorax/blob/main/docs/models/adapters/index.md#local
Download in python
Got error:
Expected behavior
Should be able to preload adapters.
The text was updated successfully, but these errors were encountered: