Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(debug): self-referencing checkpoints when resuming streaming mid-thread #2070

Merged
merged 4 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions libs/langgraph/langgraph/pregel/loop.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import asyncio

Check notice on line 1 in libs/langgraph/langgraph/pregel/loop.py

View workflow job for this annotation

GitHub Actions / benchmark

Benchmark results

......................................... fanout_to_subgraph_10x: Mean +- std dev: 45.8 ms +- 1.2 ms ......................................... fanout_to_subgraph_10x_sync: Mean +- std dev: 40.6 ms +- 1.0 ms ......................................... fanout_to_subgraph_10x_checkpoint: Mean +- std dev: 66.1 ms +- 1.8 ms ......................................... fanout_to_subgraph_10x_checkpoint_sync: Mean +- std dev: 67.8 ms +- 1.5 ms ......................................... fanout_to_subgraph_100x: Mean +- std dev: 442 ms +- 13 ms ......................................... fanout_to_subgraph_100x_sync: Mean +- std dev: 396 ms +- 7 ms ......................................... fanout_to_subgraph_100x_checkpoint: Mean +- std dev: 642 ms +- 22 ms ......................................... fanout_to_subgraph_100x_checkpoint_sync: Mean +- std dev: 646 ms +- 9 ms ......................................... react_agent_10x: Mean +- std dev: 26.7 ms +- 0.9 ms ......................................... react_agent_10x_sync: Mean +- std dev: 18.9 ms +- 0.3 ms ......................................... react_agent_10x_checkpoint: Mean +- std dev: 40.9 ms +- 3.0 ms ......................................... react_agent_10x_checkpoint_sync: Mean +- std dev: 30.9 ms +- 2.8 ms ......................................... react_agent_100x: Mean +- std dev: 294 ms +- 8 ms ......................................... react_agent_100x_sync: Mean +- std dev: 231 ms +- 5 ms ......................................... react_agent_100x_checkpoint: Mean +- std dev: 825 ms +- 15 ms ......................................... react_agent_100x_checkpoint_sync: Mean +- std dev: 743 ms +- 8 ms ......................................... wide_state_25x300: Mean +- std dev: 17.6 ms +- 0.4 ms ......................................... wide_state_25x300_sync: Mean +- std dev: 10.3 ms +- 0.2 ms ......................................... wide_state_25x300_checkpoint: Mean +- std dev: 230 ms +- 8 ms ......................................... wide_state_25x300_checkpoint_sync: Mean +- std dev: 230 ms +- 14 ms ......................................... wide_state_15x600: Mean +- std dev: 20.2 ms +- 0.5 ms ......................................... wide_state_15x600_sync: Mean +- std dev: 11.8 ms +- 0.2 ms ......................................... wide_state_15x600_checkpoint: Mean +- std dev: 396 ms +- 15 ms ......................................... wide_state_15x600_checkpoint_sync: Mean +- std dev: 393 ms +- 16 ms ......................................... wide_state_9x1200: Mean +- std dev: 20.1 ms +- 0.5 ms ......................................... wide_state_9x1200_sync: Mean +- std dev: 11.8 ms +- 0.3 ms ......................................... wide_state_9x1200_checkpoint: Mean +- std dev: 254 ms +- 8 ms ......................................... wide_state_9x1200_checkpoint_sync: Mean +- std dev: 254 ms +- 13 ms

Check notice on line 1 in libs/langgraph/langgraph/pregel/loop.py

View workflow job for this annotation

GitHub Actions / benchmark

Comparison against main

+-----------------------------------------+---------+-----------------------+ | Benchmark | main | changes | +=========================================+=========+=======================+ | fanout_to_subgraph_100x_checkpoint | 730 ms | 642 ms: 1.14x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_10x_checkpoint | 45.4 ms | 40.9 ms: 1.11x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_10x_checkpoint_sync | 34.2 ms | 30.9 ms: 1.11x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_100x_checkpoint_sync | 711 ms | 646 ms: 1.10x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_100x | 485 ms | 442 ms: 1.10x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_10x_checkpoint_sync | 73.9 ms | 67.8 ms: 1.09x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_10x_sync | 20.5 ms | 18.9 ms: 1.08x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_10x_sync | 44.0 ms | 40.6 ms: 1.08x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_15x600 | 21.8 ms | 20.2 ms: 1.08x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_9x1200 | 21.7 ms | 20.1 ms: 1.08x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_10x | 28.7 ms | 26.7 ms: 1.08x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_25x300 | 18.9 ms | 17.6 ms: 1.08x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_100x_sync | 248 ms | 231 ms: 1.08x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_100x_sync | 425 ms | 396 ms: 1.07x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_100x | 315 ms | 294 ms: 1.07x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_10x_checkpoint | 70.8 ms | 66.1 ms: 1.07x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_15x600_sync | 12.6 ms | 11.8 ms: 1.07x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_10x | 49.0 ms | 45.8 ms: 1.07x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_100x_checkpoint | 881 ms | 825 ms: 1.07x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_9x1200_sync | 12.6 ms | 11.8 ms: 1.07x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_9x1200_checkpoint_sync | 270 ms | 254 ms: 1.06x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_25x300_sync | 10.9 ms | 10.3 ms: 1.06x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_100x_checkpoint_sync | 789 ms | 743 ms: 1.06x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_9x1200_checkpoint | 268 ms | 254 ms: 1.06x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_15x600_checkpoint_sync | 414 ms | 393 ms: 1.05x faster | +---------------------------------------
import concurrent.futures
from collections import deque
from contextlib import AsyncExitStack, ExitStack
Expand Down Expand Up @@ -250,13 +250,7 @@
if self.config[CONF].get(CONFIG_KEY_CHECKPOINT_NS)
else ()
)
self.prev_checkpoint_config = (
self.checkpoint_config
if self.checkpoint_config
and CONF in self.checkpoint_config
and CONFIG_KEY_CHECKPOINT_ID in self.checkpoint_config[CONF]
else None
)
self.prev_checkpoint_config = None

def put_writes(self, task_id: str, writes: Sequence[tuple[str, Any]]) -> None:
"""Put writes for a task, to be read by the next tick."""
Expand Down Expand Up @@ -740,6 +734,7 @@
**saved.config.get(CONF, {}),
},
}
self.prev_checkpoint_config = saved.parent_config
self.checkpoint = saved.checkpoint
self.checkpoint_metadata = saved.metadata
self.checkpoint_pending_writes = (
Expand Down Expand Up @@ -867,6 +862,7 @@
**saved.config.get(CONF, {}),
},
}
self.prev_checkpoint_config = saved.parent_config
self.checkpoint = saved.checkpoint
self.checkpoint_metadata = saved.metadata
self.checkpoint_pending_writes = (
Expand Down
68 changes: 65 additions & 3 deletions libs/langgraph/tests/test_pregel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11584,6 +11584,66 @@ def baz(state: State):
assert graph.invoke({"foo": "hello"}) == {"foo": "hello", "bar": "hello!"}


def test_debug_retry():
class State(TypedDict):
messages: Annotated[list[str], operator.add]

def node(name):
def _node(state: State):
return {"messages": [f"entered {name} node"]}

return _node

builder = StateGraph(State)
builder.add_node("one", node("one"))
builder.add_node("two", node("two"))
builder.add_edge(START, "one")
builder.add_edge("one", "two")
builder.add_edge("two", END)

saver = MemorySaver()

graph = builder.compile(checkpointer=saver)

config = {"configurable": {"thread_id": "1"}}
graph.invoke({"messages": []}, config=config)

# re-run step: 1
target_config = next(
c.parent_config for c in saver.list(config) if c.metadata["step"] == 1
)
update_config = graph.update_state(target_config, values=None)

events = [*graph.stream(None, config=update_config, stream_mode="debug")]

checkpoint_events = list(
reversed([e["payload"] for e in events if e["type"] == "checkpoint"])
)

checkpoint_history = {
c.config["configurable"]["checkpoint_id"]: c
for c in graph.get_state_history(config)
}

def lax_normalize_config(config: Optional[dict]) -> Optional[dict]:
if config is None:
return None
return config["configurable"]

for stream in checkpoint_events:
stream_conf = lax_normalize_config(stream["config"])
stream_parent_conf = lax_normalize_config(stream["parent_config"])
assert stream_conf != stream_parent_conf

# ensure the streamed checkpoint == checkpoint from checkpointer.list()
history = checkpoint_history[stream["config"]["configurable"]["checkpoint_id"]]
history_conf = lax_normalize_config(history.config)
assert stream_conf == history_conf

history_parent_conf = lax_normalize_config(history.parent_config)
assert stream_parent_conf == history_parent_conf


def test_debug_subgraphs():
class State(TypedDict):
messages: Annotated[list[str], operator.add]
Expand Down Expand Up @@ -11627,16 +11687,18 @@ def _node(state: State):

assert len(checkpoint_events) == len(checkpoint_history)

def normalize_config(config: Optional[dict]) -> Optional[dict]:
def lax_normalize_config(config: Optional[dict]) -> Optional[dict]:
if config is None:
return None
return config["configurable"]

for stream, history in zip(checkpoint_events, checkpoint_history):
assert stream["values"] == history.values
assert stream["next"] == list(history.next)
assert normalize_config(stream["config"]) == normalize_config(history.config)
assert normalize_config(stream["parent_config"]) == normalize_config(
assert lax_normalize_config(stream["config"]) == lax_normalize_config(
history.config
)
assert lax_normalize_config(stream["parent_config"]) == lax_normalize_config(
history.parent_config
)

Expand Down
65 changes: 65 additions & 0 deletions libs/langgraph/tests/test_pregel_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -9820,6 +9820,71 @@ async def node(input: State, config: RunnableConfig, store: BaseStore):
) # still overwriting the same one


async def test_debug_retry():
class State(TypedDict):
messages: Annotated[list[str], operator.add]

def node(name):
async def _node(state: State):
return {"messages": [f"entered {name} node"]}

return _node

builder = StateGraph(State)
builder.add_node("one", node("one"))
builder.add_node("two", node("two"))
builder.add_edge(START, "one")
builder.add_edge("one", "two")
builder.add_edge("two", END)

saver = MemorySaver()

graph = builder.compile(checkpointer=saver)

config = {"configurable": {"thread_id": "1"}}
await graph.ainvoke({"messages": []}, config=config)

# re-run step: 1
async for c in saver.alist(config):
if c.metadata["step"] == 1:
target_config = c.parent_config
break
assert target_config is not None

update_config = await graph.aupdate_state(target_config, values=None)

events = [
c async for c in graph.astream(None, config=update_config, stream_mode="debug")
]

checkpoint_events = list(
reversed([e["payload"] for e in events if e["type"] == "checkpoint"])
)

checkpoint_history = {
c.config["configurable"]["checkpoint_id"]: c
async for c in graph.aget_state_history(config)
}

def lax_normalize_config(config: Optional[dict]) -> Optional[dict]:
if config is None:
return None
return config["configurable"]

for stream in checkpoint_events:
stream_conf = lax_normalize_config(stream["config"])
stream_parent_conf = lax_normalize_config(stream["parent_config"])
assert stream_conf != stream_parent_conf

# ensure the streamed checkpoint == checkpoint from checkpointer.list()
history = checkpoint_history[stream["config"]["configurable"]["checkpoint_id"]]
history_conf = lax_normalize_config(history.config)
assert stream_conf == history_conf

history_parent_conf = lax_normalize_config(history.parent_config)
assert stream_parent_conf == history_parent_conf


async def test_debug_subgraphs():
class State(TypedDict):
messages: Annotated[list[str], operator.add]
Expand Down
Loading