From fd4a2e6b2da51fb6b3c50f636f795c0599341ff8 Mon Sep 17 00:00:00 2001 From: Rui Barbosa Date: Thu, 5 Sep 2024 00:56:35 -0400 Subject: [PATCH] Box bug fix 202409 (#15836) * fix document matadata for box file * fix BoxSearchOptions class initialization * bump versions to 0.2.1 --- .../readers/box/BoxAPI/box_llama_adaptors.py | 12 ++++----- .../llama-index-readers-box/pyproject.toml | 2 +- .../tests/test_readers_box_reader.py | 12 +++++++++ .../llama_index/tools/box/search/base.py | 26 +++++++++++++++++++ .../llama-index-tools-box/pyproject.toml | 2 +- .../tests/test_tools_box_search.py | 11 ++++++++ 6 files changed, 57 insertions(+), 8 deletions(-) diff --git a/llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxAPI/box_llama_adaptors.py b/llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxAPI/box_llama_adaptors.py index 1b42f6a101de7..1df59ba23c0ed 100644 --- a/llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxAPI/box_llama_adaptors.py +++ b/llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxAPI/box_llama_adaptors.py @@ -23,12 +23,12 @@ def box_file_to_llama_document_metadata(box_file: File) -> dict: "description": box_file.description, "size": box_file.size, "path_collection": path_collection, - "created_at": box_file.created_at, - "modified_at": box_file.modified_at, - "trashed_at": box_file.trashed_at, - "purged_at": box_file.purged_at, - "content_created_at": box_file.content_created_at, - "content_modified_at": box_file.content_modified_at, + "created_at": box_file.created_at.isoformat(), + "modified_at": box_file.modified_at.isoformat(), + "trashed_at": box_file.trashed_at.isoformat() if box_file.trashed_at else None, + "purged_at": box_file.purged_at.isoformat() if box_file.purged_at else None, + "content_created_at": box_file.content_created_at.isoformat(), + "content_modified_at": box_file.content_modified_at.isoformat(), "created_by": f"{box_file.created_by.id},{box_file.created_by.name},{box_file.created_by.login}", "modified_by": f"{box_file.modified_by.id},{box_file.modified_by.name},{box_file.modified_by.login}", "owned_by": f"{box_file.owned_by.id},{box_file.owned_by.name},{box_file.owned_by.login}", diff --git a/llama-index-integrations/readers/llama-index-readers-box/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-box/pyproject.toml index b9180d32cc7c5..d6562fb3bcadd 100644 --- a/llama-index-integrations/readers/llama-index-readers-box/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-box/pyproject.toml @@ -37,7 +37,7 @@ maintainers = [ name = "llama-index-readers-box" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.2.0" +version = "0.2.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/readers/llama-index-readers-box/tests/test_readers_box_reader.py b/llama-index-integrations/readers/llama-index-readers-box/tests/test_readers_box_reader.py index 7cd1681d7f988..3bc2a0557737d 100644 --- a/llama-index-integrations/readers/llama-index-readers-box/tests/test_readers_box_reader.py +++ b/llama-index-integrations/readers/llama-index-readers-box/tests/test_readers_box_reader.py @@ -1,3 +1,4 @@ +import datetime import pytest from pathlib import Path from llama_index.core.readers.base import BaseReader @@ -47,6 +48,17 @@ def test_box_reader_csv(box_client_ccg_integration_testing: BoxClient): assert len(docs) == 1 +def test_box_reader_metadata(box_client_ccg_integration_testing: BoxClient): + test_data = get_testing_data() + reader = BoxReader(box_client=box_client_ccg_integration_testing) + docs = reader.load_data(file_ids=[test_data["test_csv_id"]]) + assert len(docs) == 1 + doc = docs[0] + # check if metadata dictionary does not contain any datetime objects + for v in doc.metadata.values(): + assert not isinstance(v, (datetime.datetime, datetime.date, datetime.time)) + + def test_box_reader_folder(box_client_ccg_integration_testing): # Very slow test test_data = get_testing_data() diff --git a/llama-index-integrations/tools/llama-index-tools-box/llama_index/tools/box/search/base.py b/llama-index-integrations/tools/llama-index-tools-box/llama_index/tools/box/search/base.py index 5cfe7d3d0c3cd..b29cdcd0aeb2b 100644 --- a/llama-index-integrations/tools/llama-index-tools-box/llama_index/tools/box/search/base.py +++ b/llama-index-integrations/tools/llama-index-tools-box/llama_index/tools/box/search/base.py @@ -58,6 +58,32 @@ class BoxSearchOptions: limit: Optional[int] = None offset: Optional[int] = None + def __init__( + self, + scope: Optional[SearchForContentScope] = None, + file_extensions: Optional[List[str]] = None, + created_at_range: Optional[List[str]] = None, + updated_at_range: Optional[List[str]] = None, + size_range: Optional[List[int]] = None, + owner_user_ids: Optional[List[str]] = None, + recent_updater_user_ids: Optional[List[str]] = None, + ancestor_folder_ids: Optional[List[str]] = None, + content_types: Optional[List[SearchForContentContentTypes]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + ) -> None: + self.scope = scope + self.file_extensions = file_extensions + self.created_at_range = created_at_range + self.updated_at_range = updated_at_range + self.size_range = size_range + self.owner_user_ids = owner_user_ids + self.recent_updater_user_ids = recent_updater_user_ids + self.ancestor_folder_ids = ancestor_folder_ids + self.content_types = content_types + self.limit = limit + self.offset = offset + class BoxSearchToolSpec(BaseToolSpec): """ diff --git a/llama-index-integrations/tools/llama-index-tools-box/pyproject.toml b/llama-index-integrations/tools/llama-index-tools-box/pyproject.toml index 6f226d1cf64cb..bca3f8c6c89bd 100644 --- a/llama-index-integrations/tools/llama-index-tools-box/pyproject.toml +++ b/llama-index-integrations/tools/llama-index-tools-box/pyproject.toml @@ -34,7 +34,7 @@ license = "MIT" name = "llama-index-tools-box" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.2.0" +version = "0.2.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/tools/llama-index-tools-box/tests/test_tools_box_search.py b/llama-index-integrations/tools/llama-index-tools-box/tests/test_tools_box_search.py index 8f46cb31ef582..95e2f69b93f0c 100644 --- a/llama-index-integrations/tools/llama-index-tools-box/tests/test_tools_box_search.py +++ b/llama-index-integrations/tools/llama-index-tools-box/tests/test_tools_box_search.py @@ -19,6 +19,17 @@ def test_box_tool_search(box_client_ccg_integration_testing: BoxClient): assert len(docs) > 0 +def test_box_tool_search_options(box_client_ccg_integration_testing: BoxClient): + options = BoxSearchOptions(file_extensions=["pdf"]) + options.limit = 5 + + box_tool = BoxSearchToolSpec(box_client_ccg_integration_testing, options=options) + + query = "sample" + docs = box_tool.box_search(query=query) + assert len(docs) > 0 + + def test_box_tool_search_agent(box_client_ccg_integration_testing: BoxClient): test_data = get_testing_data() openai_api_key = test_data["openai_api_key"]