diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index b57459c5bae12d..8a2df8dcddf7ff 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -210,9 +210,12 @@ def _extract_text_from_doc(file_content: bytes) -> str: # Process sorted content for _, item_type, item in content_items: if item_type == "paragraph": - text.append(item.text) + if isinstance(item,Paragraph): + text.append(item.text) elif item_type == "table": # Process tables + if not isinstance(item,Table): + continue try: # Check if any cell in the table has text has_content = False