Skip to content

Commit

Permalink
improve performance
Browse files Browse the repository at this point in the history
  • Loading branch information
louyk18 committed Jan 13, 2025
1 parent 2758dfe commit a8b7c0c
Showing 1 changed file with 7 additions and 26 deletions.
33 changes: 7 additions & 26 deletions python/graphy/utils/arxiv_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def __init__(

def find_paper_from_arxiv(self, name, max_results):
new_names = sorted(
[s for s in re.split(r"[.\\/]", name.strip()) if len(s) >= 20],
[s.strip() for s in re.split(r"[.\\/]", name.strip()) if len(s) >= 20],
key=len,
reverse=True,
)
Expand Down Expand Up @@ -132,7 +132,7 @@ def find_paper_from_arxiv(self, name, max_results):
except Exception as e:
traceback.print_exc()

if highest_similarity > 0.9 or found_result:
if highest_similarity > 0.9:
break
logger.warning(f"Not Found: {query}")

Expand Down Expand Up @@ -230,27 +230,8 @@ def fetch_papers_concurrently(


if __name__ == "__main__":
filenames = []

# Traverse the directory
for root, dirs, files in os.walk("inputs/download"):
for file in files:
# Append file names to the list
filenames.append(file)

for file_name in filenames:
download_foler = os.path.join(f"{WF_DOWNLOADS_DIR}", file_name.split(".")[0])
fetcher = ArxivFetcher(download_folder=download_foler)
os.makedirs(download_foler)
path = os.path.join("inputs", "download", file_name)
with open(f"{path}", "r") as f:
papers = f.readlines()
line_counter = 0
for paper in papers:
line_counter += 1
if line_counter % 2 == 0:
continue
paper = paper.strip()
fetcher.download_paper(paper, 5)
# fetcher.download_paper(paper.split(",")[0], 5)
# print(json.dumps(data, indent=2))
af = ArxivFetcher()
output = af.find_paper_from_arxiv(
"Peiyi Wang, Lei Li, Zhihong Shao, RX Xu, Damai Dai, Yifei Li, Deli Chen, Y Wu, and Zhifang Sui. Math-shepherd: Verify and reinforce llms step-by-step without human annotations. CoRR, abs/2312.08935 , 2023a."
)
print(output)

0 comments on commit a8b7c0c

Please sign in to comment.