Skip to content

Commit

Permalink
catch json ValueError and provide example schema (#281)
Browse files Browse the repository at this point in the history
Closes #271 

Catches ValueError raised by partition() and transforms it to a 400
error. Also provides an example of the Unstructured schema.
  • Loading branch information
Coniferish authored Oct 12, 2023
1 parent 528849e commit 1121f12
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 1 deletion.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

* Bump unstructured to 0.10.21
* Fix an unhandled error when a non pdf file is sent with content-type pdf
* Fix unhandled error when a non docx file is sent with content-type docx
* Fix an unhandled error when a non docx file is sent with content-type docx
* Fix an unhandled error when a non-Unstructured json schema is sent

## 0.0.51

Expand Down
5 changes: 5 additions & 0 deletions prepline_general/api/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,11 @@ def pipeline_api(
raise HTTPException(
status_code=400, detail=f"{file_content_type} not currently supported"
)
if "Unstructured schema" in e.args[0]:
raise HTTPException(
status_code=400,
detail="Json schema does not match the Unstructured schema",
)
raise e
except zipfile.BadZipFile as e:
if "File is not a zip file" in e.args[0]:
Expand Down
27 changes: 27 additions & 0 deletions test_general/api/test_app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
import os

import io
import pytest
Expand Down Expand Up @@ -760,3 +761,29 @@ def test_general_api_returns_400_bad_docx():
)
assert "txt is not a valid" in response.json().get("detail")
assert response.status_code == 400


def test_general_api_returns_400_bad_json(tmpdir):
"""
Verify that we get a 400 for invalid json schemas
"""
client = TestClient(app)
data = '{"hi": "there"}'

filepath = os.path.join(tmpdir, "unprocessable.json")
with open(filepath, "w") as f:
f.write(data)
response = client.post(
MAIN_API_ROUTE,
files=[
(
"files",
(
str(filepath),
open(filepath, "rb"),
),
)
],
)
assert "Unstructured schema" in response.json().get("detail")
assert response.status_code == 400

0 comments on commit 1121f12

Please sign in to comment.