Skip to content

Commit

Permalink
[Pipelines] Remove end_pos output from Llama graph
Browse files Browse the repository at this point in the history
This value is not used in either the naive or continuous batching path,
and adds unnecessary handling to the code. As a result it is being
removed.

MODULAR_ORIG_COMMIT_REV_ID: 43904ccf482234e116ef45fc251e24b7734113ae
  • Loading branch information
KCaverly authored and modularbot committed Dec 17, 2024
1 parent ddc384e commit 7995339
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 23 deletions.
33 changes: 12 additions & 21 deletions pipelines/python/llama3/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,13 @@ def execute(self, *model_inputs: Tensor) -> ModelOutputs:
*model_inputs, copy_inputs_to_device=False
)

if self.pipeline_config.cache_strategy == KVCacheStrategy.CONTINUOUS:
if self.pipeline_config.enable_echo:
assert len(model_outputs) == 2
return ModelOutputs(
next_token_logits=model_outputs[0], logits=model_outputs[1]
)
else:
assert len(model_outputs) == 1
return ModelOutputs(next_token_logits=model_outputs[0])
if self.pipeline_config.enable_echo:
return ModelOutputs(
next_token_logits=model_outputs[0],
logits=model_outputs[1],
)
else:
if self.pipeline_config.enable_echo:
assert len(model_outputs) == 3
return ModelOutputs(
next_token_logits=model_outputs[0], logits=model_outputs[2]
)
else:
assert len(model_outputs) == 2
return ModelOutputs(next_token_logits=model_outputs[0])
return ModelOutputs(next_token_logits=model_outputs[0])

def _prepare_continuous_initial_token_inputs(
self, context_batch: list[TextContext]
Expand Down Expand Up @@ -283,17 +272,19 @@ def _build_graph(self, weights: GGUFWeights) -> Graph:
]
else DType.float32
)
logits, end_pos = model(
logits = model(
tokens,
attention_mask.cast(mask_dtype),
k_cache,
v_cache,
start_pos,
)
)[0]

if self.pipeline_config.enable_echo:
graph.output(logits[:, -1], end_pos, logits)
graph.output(logits[:, -1], logits)
else:
graph.output(logits[:, -1], end_pos)
graph.output(logits[:, -1])

return graph

def compute_log_probabilities(
Expand Down
2 changes: 0 additions & 2 deletions pipelines/python/nn/transformer/naive_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ def __call__(
i,
)

seq_len = TensorValue(tokens.shape[1]) # type: ignore
return (
ops.cast(self.output(self.norm(h)), k_cache.dtype), # type: ignore
start_pos + seq_len,
)

0 comments on commit 7995339

Please sign in to comment.