Skip to content
This repository has been archived by the owner on Apr 15, 2024. It is now read-only.

TypeError: argument of type 'NoneType' is not iterable #316

Open
davaer131518 opened this issue Apr 13, 2022 · 1 comment
Open

TypeError: argument of type 'NoneType' is not iterable #316

davaer131518 opened this issue Apr 13, 2022 · 1 comment

Comments

@davaer131518
Copy link

davaer131518 commented Apr 13, 2022

I wrote code that parses pdfs to text and the below error keeps occurring on one of the pdfs which is completely similar to the others that the package successfully parses (attached the pdf). The error happened in the following function I made:

def pdf_to_text(input_file, output):
    i_f = open(input_file,'rb')
    resMgr = PDFResourceManager()
    retData = io.StringIO()
    TxtConverter = TextConverter(resMgr,retData, laparams= LAParams())
    interpreter = PDFPageInterpreter(resMgr,TxtConverter)
    for page in PDFPage.get_pages(i_f):
        interpreter.process_page(page)

    txt = retData.getvalue()
    with open(os.getcwd() + '\\PDFs\\' + output,'w', encoding='utf-8-sig') as of:
        of.write(txt)

0040a8d544f3eb073be9de24a4eee14e.pdf

~\AppData\Local\Temp/ipykernel_11728/660795657.py in pdf_to_text(input_file, output)
6 interpreter = PDFPageInterpreter(resMgr,TxtConverter)
7 for page in PDFPage.get_pages(i_f):
----> 8 interpreter.process_page(page)
9
10 txt = retData.getvalue()

~\anaconda3\lib\site-packages\pdfminer\pdfinterp.py in process_page(self, page)
839 self.textstate.rise = cast(float, rise)
840 return
--> 841
842 def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None:
843 """Move text position"""

~\anaconda3\lib\site-packages\pdfminer\pdfinterp.py in render_contents(self, resources, streams, ctm)
850
851 def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None:
--> 852 """Move text position and set leading"""
853 tx = cast(float, tx)
854 ty = cast(float, ty)

~\anaconda3\lib\site-packages\pdfminer\pdfinterp.py in init_resources(self, resources)
354
355 def init_resources(self, resources: Dict[object, object]) -> None:
--> 356 """Prepare the fonts and XObjects listed in the Resource attribute."""
357 self.resources = resources
358 self.fontmap: Dict[object, PDFFont] = {}

~\anaconda3\lib\site-packages\pdfminer\pdfinterp.py in get_font(self, objid, spec)
202 else:
203 log.debug("get_font: create: objid=%r, spec=%r", objid, spec)
--> 204 if settings.STRICT:
205 if spec["Type"] is not LITERAL_FONT:
206 raise PDFFontError("Type is not /Font")

~\anaconda3\lib\site-packages\pdfminer\pdfinterp.py in get_font(self, objid, spec)
193 return CMapDB.get_cmap(cmapname)
194 except CMapDB.CMapNotFound:
--> 195 if strict:
196 raise
197 return CMap()

~\anaconda3\lib\site-packages\pdfminer\pdffont.py in init(self, rsrcmgr, spec)
665 self.gid2code[gid] = code
666 elif format == b"\x01":
--> 667 # Format 1
668 (n,) = struct.unpack("B", self.fp.read(1))
669 code = 0

~\anaconda3\lib\site-packages\pdfminer\pdftypes.py in get_data(self)
290 if name in self.attrs:
291 return self.attrs[name]
--> 292 return default
293
294 def get_filters(self) -> List[Tuple[Any, Any]]:

~\anaconda3\lib\site-packages\pdfminer\pdftypes.py in decode(self)
271 else:
272 assert self.data is not None
--> 273 return "<PDFStream(%r): len=%d, %r>" % (
274 self.objid,
275 len(self.data),

TypeError: argument of type 'NoneType' is not iterable

@himanshugarg
Copy link

@davaer131518 you might want to post your script and the pdf so someone who wants to reproduce the problem on their end can do so. Thanks

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants