From 37abd2b1001944cd6923d7c18c0b74639074eaf7 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Tue, 21 Jan 2025 12:33:59 -0800 Subject: [PATCH 1/5] fix images --- .../server/query_and_chat/chat_backend.py | 44 ++++++------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/backend/onyx/server/query_and_chat/chat_backend.py b/backend/onyx/server/query_and_chat/chat_backend.py index 669a416322f..f2d8e072edb 100644 --- a/backend/onyx/server/query_and_chat/chat_backend.py +++ b/backend/onyx/server/query_and_chat/chat_backend.py @@ -5,7 +5,6 @@ import uuid from collections.abc import Callable from collections.abc import Generator -from typing import Tuple from uuid import UUID from fastapi import APIRouter @@ -15,7 +14,6 @@ from fastapi import Response from fastapi import UploadFile from fastapi.responses import StreamingResponse -from PIL import Image from pydantic import BaseModel from sqlalchemy.orm import Session @@ -595,21 +593,6 @@ def seed_chat_from_slack( """File upload""" -def convert_to_jpeg(file: UploadFile) -> Tuple[io.BytesIO, str]: - try: - with Image.open(file.file) as img: - if img.mode != "RGB": - img = img.convert("RGB") - jpeg_io = io.BytesIO() - img.save(jpeg_io, format="JPEG", quality=85) - jpeg_io.seek(0) - return jpeg_io, "image/jpeg" - except Exception as e: - raise HTTPException( - status_code=400, detail=f"Failed to convert image: {str(e)}" - ) - - @router.post("/file") def upload_files_for_chat( files: list[UploadFile], @@ -674,24 +657,22 @@ def upload_files_for_chat( file_store = get_default_file_store(db_session) + file_type_mapping = { + **{ct: ChatFileType.IMAGE for ct in image_content_types}, + **{ct: ChatFileType.CSV for ct in csv_content_types}, + **{ct: ChatFileType.DOC for ct in document_content_types}, + } + file_info: list[tuple[str, str | None, ChatFileType]] = [] for file in files: - if file.content_type in image_content_types: - file_type = ChatFileType.IMAGE - # Convert image to JPEG - file_content, new_content_type = convert_to_jpeg(file) - elif file.content_type in csv_content_types: - file_type = ChatFileType.CSV - file_content = io.BytesIO(file.file.read()) - new_content_type = file.content_type or "" - elif file.content_type in document_content_types: - file_type = ChatFileType.DOC - file_content = io.BytesIO(file.file.read()) - new_content_type = file.content_type or "" + file_type = file_type_mapping.get(file.content_type, ChatFileType.PLAIN_TEXT) + + if file_type == ChatFileType.IMAGE: + file_content = file.file else: - file_type = ChatFileType.PLAIN_TEXT file_content = io.BytesIO(file.file.read()) - new_content_type = file.content_type or "" + + new_content_type = file.content_type or "" # store the file (now JPEG for images) file_id = str(uuid.uuid4()) @@ -758,5 +739,6 @@ def fetch_chat_file( media_type = file_record.file_type file_io = file_store.read_file(file_id, mode="b") + print("Meidate type: ", media_type) return StreamingResponse(file_io, media_type=media_type) From f6784b5b83c77f603122565b291692b4cc5097be Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Tue, 21 Jan 2025 12:37:27 -0800 Subject: [PATCH 2/5] quick nit --- backend/onyx/server/query_and_chat/chat_backend.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/onyx/server/query_and_chat/chat_backend.py b/backend/onyx/server/query_and_chat/chat_backend.py index f2d8e072edb..786a0ed8df8 100644 --- a/backend/onyx/server/query_and_chat/chat_backend.py +++ b/backend/onyx/server/query_and_chat/chat_backend.py @@ -628,6 +628,9 @@ def upload_files_for_chat( ) for file in files: + if not file.content_type: + raise HTTPException(status_code=400, detail="File content type is required") + if file.content_type not in allowed_content_types: if file.content_type in image_content_types: error_detail = "Unsupported image file type. Supported image types include .jpg, .jpeg, .png, .webp." @@ -672,7 +675,7 @@ def upload_files_for_chat( else: file_content = io.BytesIO(file.file.read()) - new_content_type = file.content_type or "" + new_content_type = file.content_type # store the file (now JPEG for images) file_id = str(uuid.uuid4()) From e4020a404b1cbc3784ec00c6580e8f853951b323 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Tue, 21 Jan 2025 12:38:19 -0800 Subject: [PATCH 3/5] quick nit --- backend/onyx/server/query_and_chat/chat_backend.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/onyx/server/query_and_chat/chat_backend.py b/backend/onyx/server/query_and_chat/chat_backend.py index 786a0ed8df8..236255325ff 100644 --- a/backend/onyx/server/query_and_chat/chat_backend.py +++ b/backend/onyx/server/query_and_chat/chat_backend.py @@ -668,7 +668,9 @@ def upload_files_for_chat( file_info: list[tuple[str, str | None, ChatFileType]] = [] for file in files: - file_type = file_type_mapping.get(file.content_type, ChatFileType.PLAIN_TEXT) + file_type = file_type_mapping.get( + str(file.content_type), ChatFileType.PLAIN_TEXT + ) if file_type == ChatFileType.IMAGE: file_content = file.file From 3dae32033e25b424e73fecd5ca08b72d8633f9a2 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Tue, 21 Jan 2025 13:11:08 -0800 Subject: [PATCH 4/5] update --- backend/onyx/server/query_and_chat/chat_backend.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/onyx/server/query_and_chat/chat_backend.py b/backend/onyx/server/query_and_chat/chat_backend.py index 236255325ff..39dbcbd8f63 100644 --- a/backend/onyx/server/query_and_chat/chat_backend.py +++ b/backend/onyx/server/query_and_chat/chat_backend.py @@ -674,6 +674,11 @@ def upload_files_for_chat( if file_type == ChatFileType.IMAGE: file_content = file.file + # NOTE: Image conversion to JPEG used to be enforced here. + # This was removed to: + # 1. Preserve original file content for downloads + # 2. Maintain transparency in formats like PNG + # 3. Ameliorate issue with file conversion else: file_content = io.BytesIO(file.file.read()) @@ -744,6 +749,5 @@ def fetch_chat_file( media_type = file_record.file_type file_io = file_store.read_file(file_id, mode="b") - print("Meidate type: ", media_type) return StreamingResponse(file_io, media_type=media_type) From 15dfaa156a49e2d735f05c867d6b3db033fb1342 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Tue, 21 Jan 2025 13:12:19 -0800 Subject: [PATCH 5/5] update for clarity --- .../onyx/server/query_and_chat/chat_backend.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/backend/onyx/server/query_and_chat/chat_backend.py b/backend/onyx/server/query_and_chat/chat_backend.py index 39dbcbd8f63..32c278dcdc9 100644 --- a/backend/onyx/server/query_and_chat/chat_backend.py +++ b/backend/onyx/server/query_and_chat/chat_backend.py @@ -660,16 +660,16 @@ def upload_files_for_chat( file_store = get_default_file_store(db_session) - file_type_mapping = { - **{ct: ChatFileType.IMAGE for ct in image_content_types}, - **{ct: ChatFileType.CSV for ct in csv_content_types}, - **{ct: ChatFileType.DOC for ct in document_content_types}, - } - file_info: list[tuple[str, str | None, ChatFileType]] = [] for file in files: - file_type = file_type_mapping.get( - str(file.content_type), ChatFileType.PLAIN_TEXT + file_type = ( + ChatFileType.IMAGE + if file.content_type in image_content_types + else ChatFileType.CSV + if file.content_type in csv_content_types + else ChatFileType.DOC + if file.content_type in document_content_types + else ChatFileType.PLAIN_TEXT ) if file_type == ChatFileType.IMAGE: