Skip to content

Commit

Permalink
fix NUL character
Browse files Browse the repository at this point in the history
  • Loading branch information
pablonyx committed Dec 21, 2024
1 parent 457a4c7 commit cad8253
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions backend/onyx/indexing/indexing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,21 @@ def index_doc_batch_prepare(
def filter_documents(document_batch: list[Document]) -> list[Document]:
documents: list[Document] = []
for document in document_batch:
# Remove any NUL characters from title/semantic_id
# This is a known issue with the Zendesk connector
# Postgres cannot handle NUL characters in text fields
if document.title:
document.title = document.title.replace("\x00", "")
if document.semantic_identifier:
document.semantic_identifier = document.semantic_identifier.replace(
"\x00", ""
)

# Remove NUL characters from all sections
for section in document.sections:
if section.text is not None:
section.text = section.text.replace("\x00", "")

empty_contents = not any(section.text.strip() for section in document.sections)
if (
(not document.title or not document.title.strip())
Expand Down

0 comments on commit cad8253

Please sign in to comment.