diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index c3f0b75cfba5f1..15822867bbe5e4 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -170,6 +170,8 @@ def _parse_cell_paragraph(self, paragraph, image_map): if run.element.xpath('.//a:blip'): for blip in run.element.xpath('.//a:blip'): image_id = blip.get("{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed") + if not image_id: + continue image_part = paragraph.part.rels[image_id].target_part if image_part in image_map: @@ -256,6 +258,6 @@ def parse_paragraph(paragraph): content.append(parsed_paragraph) elif isinstance(element.tag, str) and element.tag.endswith('tbl'): # table table = tables.pop(0) - content.append(self._table_to_markdown(table,image_map)) + content.append(self._table_to_markdown(table, image_map)) return '\n'.join(content)