-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
50 lines (42 loc) · 1.54 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import tempfile
from pathlib import Path
import pymupdf
import streamlit as st
from refcheck.doi import get_dois, doi_to_url, fetch_doi_json
FORMAT_TO_EXT = {
"markdown": "md",
"docx": "docx",
"latex": "tex",
"pdf": "pdf",
}
st.title("Reference Checker")
st.write("Get a clickable list of the dois in your document.")
format = st.selectbox("Choose a file format", options=FORMAT_TO_EXT.keys())
ext = FORMAT_TO_EXT[format]
uploaded_file = st.file_uploader("Upload a file", type=ext)
results = []
if uploaded_file is not None:
temp_dir = tempfile.mkdtemp()
path = Path(temp_dir) / uploaded_file.name
with open(path, "wb") as f:
f.write(uploaded_file.getvalue())
if format == "pdf":
doc = pymupdf.open(path)
text = (
"".join(page.get_text() for page in doc).replace("\n", " ").encode("utf-8")
)
txt_path = path.with_suffix(".txt")
with open(txt_path, "wb") as f:
f.write(text)
results = get_dois(txt_path, "rtf")
else:
results = get_dois(path, format)
urls = [doi_to_url(doi) for doi in results]
doi_jsons = [fetch_doi_json(url) for url in urls]
st.write("Here are the dois in your document. Please check if they are correct!")
for doi, url, doi_json in zip(results, urls, doi_jsons):
title = f"*{doi_json["title"]}*"
authors = ", ".join(f"{a["given"]} {a["family"]}" for a in doi_json["author"])
st.write(f"[**{doi}**]({url}): {title}", unsafe_allow_html=True)
st.write(authors)
st.write("")