You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have an extract.py file that takes a pdf location as parameter and executes the extract. This is working fine on Arch linux,
however on a debian 9 (or 10, up to date) it goes into an infinite loop.
command I am trying to run : ./extract.py /tmp/p_3f4b8d2131dca8b1e1890d1b890ceb26.pdf
extract.py source:
`
import sys
from refextract import extract_references_from_file
when I ctrl+c the cycle, it gives the following output: ^CTraceback (most recent call last): File "./extract.py", line 9, in <module> references = extract_references_from_file(sys.argv[1]) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/api.py", line 139, in extract_references_from_file override_kbs_files=override_kbs_files, File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/engine.py", line 1456, in parse_references parse_references_elements(reference_lines, kbs, linker_callback) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/engine.py", line 878, in parse_references_elements clean_line, kbs, bad_titles_count, linker_callback) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/engine.py", line 635, in parse_reference_line bad_titles_count) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/tag.py", line 174, in tag_reference_line kbs=kbs, File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/tag.py", line 326, in process_reference_line tagged_line = identify_and_tag_authors(tagged_line, kbs['authors']) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/tag.py", line 881, in identify_and_tag_authors re_auth, re_auth_near_miss = get_author_regexps() File "/home/sspm/.local/lib/python3.7/site-packages/refextract/authors/regexs.py", line 470, in get_author_regexps re.VERBOSE | re.UNICODE)) File "/usr/lib/python3.7/re.py", line 234, in compile return _compile(pattern, flags) File "/usr/lib/python3.7/re.py", line 286, in _compile p = sre_compile.compile(pattern, flags) File "/usr/lib/python3.7/sre_compile.py", line 764, in compile p = sre_parse.parse(p, flags) File "/usr/lib/python3.7/sre_parse.py", line 930, in parse p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 816, in _parse p = _parse_sub(source, state, sub_verbose, nested + 1) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 816, in _parse p = _parse_sub(source, state, sub_verbose, nested + 1) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 816, in _parse p = _parse_sub(source, state, sub_verbose, nested + 1) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 816, in _parse p = _parse_sub(source, state, sub_verbose, nested + 1) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 587, in _parse set = _uniq(set) File "/usr/lib/python3.7/sre_parse.py", line 413, in _uniq if item not in newitems: KeyboardInterrupt
Additional information:
Python version: 3.7
pdftotext version: 0.71.0 (this extracts text from the pdf just fine)
one of the pdf files that I used (one of many, it hangs on every one, but every one is successfully processed on Arch linux): a.pdf
The text was updated successfully, but these errors were encountered:
Good day,
I have an extract.py file that takes a pdf location as parameter and executes the extract. This is working fine on Arch linux,
however on a debian 9 (or 10, up to date) it goes into an infinite loop.
command I am trying to run :
./extract.py /tmp/p_3f4b8d2131dca8b1e1890d1b890ceb26.pdf
extract.py source:
`
import sys
from refextract import extract_references_from_file
if len(sys.argv) != 2:
sys.exit()
references = extract_references_from_file(sys.argv[1])
`
when I ctrl+c the cycle, it gives the following output:
^CTraceback (most recent call last): File "./extract.py", line 9, in <module> references = extract_references_from_file(sys.argv[1]) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/api.py", line 139, in extract_references_from_file override_kbs_files=override_kbs_files, File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/engine.py", line 1456, in parse_references parse_references_elements(reference_lines, kbs, linker_callback) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/engine.py", line 878, in parse_references_elements clean_line, kbs, bad_titles_count, linker_callback) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/engine.py", line 635, in parse_reference_line bad_titles_count) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/tag.py", line 174, in tag_reference_line kbs=kbs, File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/tag.py", line 326, in process_reference_line tagged_line = identify_and_tag_authors(tagged_line, kbs['authors']) File "/home/sspm/.local/lib/python3.7/site-packages/refextract/references/tag.py", line 881, in identify_and_tag_authors re_auth, re_auth_near_miss = get_author_regexps() File "/home/sspm/.local/lib/python3.7/site-packages/refextract/authors/regexs.py", line 470, in get_author_regexps re.VERBOSE | re.UNICODE)) File "/usr/lib/python3.7/re.py", line 234, in compile return _compile(pattern, flags) File "/usr/lib/python3.7/re.py", line 286, in _compile p = sre_compile.compile(pattern, flags) File "/usr/lib/python3.7/sre_compile.py", line 764, in compile p = sre_parse.parse(p, flags) File "/usr/lib/python3.7/sre_parse.py", line 930, in parse p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 816, in _parse p = _parse_sub(source, state, sub_verbose, nested + 1) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 816, in _parse p = _parse_sub(source, state, sub_verbose, nested + 1) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 816, in _parse p = _parse_sub(source, state, sub_verbose, nested + 1) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 816, in _parse p = _parse_sub(source, state, sub_verbose, nested + 1) File "/usr/lib/python3.7/sre_parse.py", line 426, in _parse_sub not nested and not items)) File "/usr/lib/python3.7/sre_parse.py", line 587, in _parse set = _uniq(set) File "/usr/lib/python3.7/sre_parse.py", line 413, in _uniq if item not in newitems: KeyboardInterrupt
Additional information:
Python version: 3.7
pdftotext version: 0.71.0 (this extracts text from the pdf just fine)
one of the pdf files that I used (one of many, it hangs on every one, but every one is successfully processed on Arch linux):
a.pdf
The text was updated successfully, but these errors were encountered: