Skip to content

Commit

Permalink
Add more metadata options for File connector (#797)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhongsun96 authored Nov 30, 2023
1 parent 9cff294 commit 17c2f06
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions backend/danswer/connectors/file/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
from danswer.connectors.cross_connector_utils.file_utils import read_file
from danswer.connectors.cross_connector_utils.file_utils import read_pdf_file
from danswer.connectors.cross_connector_utils.time_utils import time_str_to_utc
from danswer.connectors.file.utils import check_file_ext_is_valid
from danswer.connectors.file.utils import get_file_ext
from danswer.connectors.interfaces import GenerateDocumentsOutput
Expand Down Expand Up @@ -63,15 +64,20 @@ def _process_file(
else:
file_content_raw, metadata = read_file(file)

dt_str = metadata.get("doc_updated_at")
final_time_updated = time_str_to_utc(dt_str) if dt_str else time_updated

return [
Document(
id=file_name,
sections=[
Section(link=metadata.get("link", None), text=file_content_raw.strip())
Section(link=metadata.get("link"), text=file_content_raw.strip())
],
source=DocumentSource.FILE,
semantic_identifier=file_name,
doc_updated_at=time_updated,
doc_updated_at=final_time_updated,
primary_owners=metadata.get("primary_owners"),
secondary_owners=metadata.get("secondary_owners"),
metadata={},
)
]
Expand Down

1 comment on commit 17c2f06

@vercel
Copy link

@vercel vercel bot commented on 17c2f06 Nov 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.