Skip to content

Commit

Permalink
cr
Browse files Browse the repository at this point in the history
  • Loading branch information
hwchase17 committed Aug 9, 2023
1 parent ef9348b commit 570d4e2
Showing 1 changed file with 37 additions and 4 deletions.
41 changes: 37 additions & 4 deletions libs/langchain/langchain/retrievers/parent_document_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,51 @@ def get_relevant_documents(
return_docs = cast(List[Document], docs)
return return_docs

def add_documents(self, documents: List[Document]) -> None:
def add_documents(
self,
documents: List[Document],
ids: Optional[List[str]],
add_to_docstore: bool = True,
) -> None:
"""Adds documents to the docstore and vectorstores.
Args:
documents: List of documents to add
ids: Optional list of ids for documents. If provided should be the same
length as the list of documents. Can provided if parent documents
are already in the document store and you don't want to re-add
to the docstore. If not provided, random UUIDs will be used as
ids.
add_to_docstore: Boolean of whether to add documents to docstore.
This can be false if and only if `ids` are provided. You may want
to set this to False if the documents are already in the docstore
and you don't want to re-add them.
"""
if self.parent_splitter is not None:
documents = self.parent_splitter.split_documents(documents)
if ids is None:
doc_ids = [str(uuid.uuid4()) for _ in documents]
if not add_to_docstore:
raise ValueError(
"If ids are not passed in, `add_to_docstore` MUST be True"
)
else:
if len(documents) != len(ids):
raise ValueError(
"Got uneven list of documents and ids. "
"If `ids` is provided, should be same length as `documents`."
)
doc_ids = ids

docs = []
full_docs = []
for doc in documents:
_id = str(uuid.uuid4())
for i, doc in enumerate(documents):
_id = doc_ids[i]
sub_docs = self.child_splitter.split_documents([doc])
for _doc in sub_docs:
_doc.metadata[self.id_key] = _id
docs.extend(sub_docs)
full_docs.append((_id, doc))
self.vectorstore.add_documents(docs)
self.docstore.mset(full_docs)
if add_to_docstore:
self.docstore.mset(full_docs)

0 comments on commit 570d4e2

Please sign in to comment.