From fe538d4bcbecf2eecb8a0cc2ace548908d24e608 Mon Sep 17 00:00:00 2001 From: Mingqi Hu Date: Tue, 3 Dec 2024 09:36:13 +0800 Subject: [PATCH 1/2] docs: Use edit mode as default to install template (#2590) Small change to install the dependencies with `edit` mode so that users or freshman can see the effect immediately when they change the template code. As below, `pip install -e .` It's very good to evaluate how agent works and easy to test & re-develop! --------- Signed-off-by: Mingqi Hu Co-authored-by: William FH <13333726+hinthornw@users.noreply.github.com> --- docs/docs/tutorials/langgraph-platform/local-server.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/tutorials/langgraph-platform/local-server.md b/docs/docs/tutorials/langgraph-platform/local-server.md index 9c8e3ea9f..db7f53fea 100644 --- a/docs/docs/tutorials/langgraph-platform/local-server.md +++ b/docs/docs/tutorials/langgraph-platform/local-server.md @@ -35,10 +35,10 @@ Create a new app from the `react-agent` template. This template is a simple agen ## Install Dependencies -In the root of your new LangGraph app, install the dependencies: +In the root of your new LangGraph app, install the dependencies in `edit` mode so your local changes are used by the server: ```shell -pip install . +pip install -e . ``` ## Create a `.env` file From 15f0765d60f18a9c3a2015af3c6506232750643c Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Mon, 2 Dec 2024 17:42:29 -0800 Subject: [PATCH 2/2] Add IVFFlat and HNSW support (#2598) It seems that actually once i moved the operators & other things out, the query planner does do reasonable things and do sequential scanning if filtered N < some size but the index otherwise, even with namespace filtering. --- .../langgraph/store/postgres/base.py | 70 ++++++++++++++++++- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/libs/checkpoint-postgres/langgraph/store/postgres/base.py b/libs/checkpoint-postgres/langgraph/store/postgres/base.py index 94c7d3e4e..28edd8998 100644 --- a/libs/checkpoint-postgres/langgraph/store/postgres/base.py +++ b/libs/checkpoint-postgres/langgraph/store/postgres/base.py @@ -56,6 +56,7 @@ class Migration(NamedTuple): sql: str params: Optional[dict[str, Any]] = None + condition: Optional[Callable[["BasePostgresStore"], bool]] = None MIGRATIONS: Sequence[str] = [ @@ -104,11 +105,29 @@ class Migration(NamedTuple): ), }, ), - # TODO: Add an HNSW or IVFFlat index depending on config - # First must improve the search query when filtering by - # namespace + Migration( + """ +CREATE INDEX IF NOT EXISTS store_vectors_embedding_idx ON store_vectors + USING %(index_type)s (embedding %(ops)s)%(index_params)s; +""", + condition=lambda store: bool( + store.index_config and _get_index_params(store)[0] != "flat" + ), + params={ + "index_type": lambda store: _get_index_params(store)[0], + "ops": lambda store: _get_vector_type_ops(store), + "index_params": lambda store: ( + " WITH (" + + ", ".join(f"{k}={v}" for k, v in _get_index_params(store)[1].items()) + + ")" + if _get_index_params(store)[1] + else "" + ), + }, + ), ] + C = TypeVar("C", bound=Union[_pg_internal.Conn, _ainternal.Conn]) @@ -140,6 +159,8 @@ class PoolConfig(TypedDict, total=False): class ANNIndexConfig(TypedDict, total=False): """Configuration for vector index in PostgreSQL store.""" + kind: Literal["hnsw", "ivfflat", "flat"] + """Type of index to use: 'hnsw' for Hierarchical Navigable Small World, or 'ivfflat' for Inverted File Flat.""" vector_type: Literal["vector", "halfvec"] """Type of vector storage to use. Options: @@ -148,6 +169,35 @@ class ANNIndexConfig(TypedDict, total=False): """ +class HNSWConfig(ANNIndexConfig, total=False): + """Configuration for HNSW (Hierarchical Navigable Small World) index.""" + + kind: Literal["hnsw"] # type: ignore[misc] + m: int + """Maximum number of connections per layer. Default is 16.""" + ef_construction: int + """Size of dynamic candidate list for index construction. Default is 64.""" + + +class IVFFlatConfig(ANNIndexConfig, total=False): + """IVFFlat index divides vectors into lists, and then searches a subset of those lists that are closest to the query vector. It has faster build times and uses less memory than HNSW, but has lower query performance (in terms of speed-recall tradeoff). + + Three keys to achieving good recall are: + 1. Create the index after the table has some data + 2. Choose an appropriate number of lists - a good place to start is rows / 1000 for up to 1M rows and sqrt(rows) for over 1M rows + 3. When querying, specify an appropriate number of probes (higher is better for recall, lower is better for speed) - a good place to start is sqrt(lists) + """ + + kind: Literal["ivfflat"] # type: ignore[misc] + nlist: int + """Number of inverted lists (clusters) for IVF index. + + Determines the number of clusters used in the index structure. + Higher values can improve search speed but increase index size and build time. + Typically set to the square root of the number of vectors in the index. + """ + + class PostgresIndexConfig(IndexConfig, total=False): """Configuration for vector embeddings in PostgreSQL store with pgvector-specific options. @@ -774,6 +824,8 @@ def _get_version(cur: Cursor[dict[str, Any]], table: str) -> int: for v, migration in enumerate( self.VECTOR_MIGRATIONS[version + 1 :], start=version + 1 ): + if migration.condition and not migration.condition(self): + continue sql = migration.sql if migration.params: params = { @@ -832,6 +884,18 @@ def _get_vector_type_ops(store: BasePostgresStore) -> str: return f"{type_prefix}_{distance_suffix}" +def _get_index_params(store: Any) -> tuple[str, dict[str, Any]]: + """Get the index type and configuration based on config.""" + if not store.index_config: + return "hnsw", {} + + config = cast(PostgresIndexConfig, store.index_config) + index_config = config.get("ann_index_config", _DEFAULT_ANN_CONFIG).copy() + kind = index_config.pop("kind", "hnsw") + index_config.pop("vector_type", None) + return kind, index_config + + def _namespace_to_text( namespace: tuple[str, ...], handle_wildcards: bool = False ) -> str: