From 96b6b8033668a09eb99d551eecd8d56eb5af2a51 Mon Sep 17 00:00:00 2001 From: apoorvalal Date: Wed, 11 Oct 2023 13:57:35 -0700 Subject: [PATCH] added modes --- .github/workflows/post.yml | 34 ++++++++--- paperbot.py | 121 +++++++++++++++++++++---------------- 2 files changed, 96 insertions(+), 59 deletions(-) diff --git a/.github/workflows/post.yml b/.github/workflows/post.yml index 4423c9d..db9f953 100644 --- a/.github/workflows/post.yml +++ b/.github/workflows/post.yml @@ -2,24 +2,44 @@ name: "GH Arxiv Posterbot" on: workflow_dispatch: {} schedule: + - cron: "0 0,13,15,17,19,21,23 * * *" - cron: "0 0,13,19 * * *" jobs: - build: + post: runs-on: ubuntu-latest steps: - name: Checkout Repository - uses: actions/checkout@v3 + uses: actions/checkout@v2 + - uses: actions/setup-python@v2 with: - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.ref }} - - uses: actions/setup-python@v4 + python-version: '3.9' + cache: 'pip' # caching pip dependencies + - run: pip install -r requirements.txt + - name: scrape and run in post mode + run: python3 paperbot.py post + env: + BSKYBOT: ${{ secrets.BSKYBOT }} + BSKYPWD: ${{ secrets.BSKYPWD }} + - name: Commit and push + uses: EndBug/add-and-commit@v9 + with: + add: "." + push: true + default_author: github_actions + + update: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + - uses: actions/setup-python@v2 with: python-version: '3.9' cache: 'pip' # caching pip dependencies - run: pip install -r requirements.txt - - name: scrape and run - run: python3 paperbot.py + - name: scrape and run in update mode + run: python3 paperbot.py update env: BSKYBOT: ${{ secrets.BSKYBOT }} BSKYPWD: ${{ secrets.BSKYPWD }} diff --git a/paperbot.py b/paperbot.py index c1c68c2..a38d043 100644 --- a/paperbot.py +++ b/paperbot.py @@ -155,7 +155,7 @@ def get_arxiv_feed(subject: str): return res -def get_and_write_feed_json(feedname: str, filename: str): +def get_and_write_feed_json(feedname: str, filename: str, write: bool = True): feed = get_arxiv_feed(feedname) with open(filename, "r") as f: archive = json.load(f) @@ -165,7 +165,7 @@ def get_and_write_feed_json(feedname: str, filename: str): if k not in archive: new_archive[k] = v # write out only if new items exist - if len(new_archive) > len(archive): + if len(new_archive) > len(archive) and write: with open(filename, "w") as f: json.dump(new_archive, f, indent=None) print(f"{filename} updated") @@ -173,62 +173,79 @@ def get_and_write_feed_json(feedname: str, filename: str): # %% +import argparse + + def main(): - # query and write immediately - stats_pull, stat_me_archive = get_and_write_feed_json( - "stat.ME", "stat_me_draws.json" + parser = argparse.ArgumentParser( + description="Post new articles from arxiv stat.ME and econ.EM." + ) + parser.add_argument( + "mode", choices=["update", "post"], help="mode to run the script in" ) - em_pull, econ_em_archive = get_and_write_feed_json("econ.EM", "econ_em_draws.json") - ###################################################################### - # stats - ###################################################################### - # read existing data from "stat_me_draws.json" file - new_posts = 0 - # Append new data to existing data - for k, v in stats_pull.items(): - if k not in stat_me_archive: # if not already posted + args = parser.parse_args() + if args.mode == "update": + # query and write immediately + stats_pull, stat_me_archive = get_and_write_feed_json( + "stat.ME", "stat_me_draws.json", write=True + ) + em_pull, econ_em_archive = get_and_write_feed_json( + "econ.EM", "econ_em_draws.json", write=True + ) + elif args.mode == "post": + stats_pull, stat_me_archive = get_and_write_feed_json( + "stat.ME", "stat_me_draws.json", write=False + ) + em_pull, econ_em_archive = get_and_write_feed_json( + "econ.EM", "econ_em_draws.json", write=False + ) + + # read existing data from "stat_me_draws.json" file + new_posts = 0 + # Append new data to existing data + for k, v in stats_pull.items(): + if k not in stat_me_archive: # if not already posted + create_post( + f"{v['title']}\n{v['link']}\n{''.join(v['description'])}"[:297] + + "\nšŸ“ˆšŸ¤–" + ) + time.sleep(random.randint(120, 600)) + stat_me_archive[k] = v + new_posts += 1 + if new_posts == 0 & (len(stat_me_archive) > 2): + print("No new papers found; posting random paper from archive") + random_paper = random.choice(list(stat_me_archive.values())) create_post( - f"{v['title']}\n{v['link']}\n{''.join(v['description'])}"[:297] + "\nšŸ“ˆšŸ¤–" + f"{random_paper['title']}\n{random_paper['link']}\n{''.join(random_paper['description'])}"[ + :297 + ] + + "\nšŸ“ˆšŸ¤–" ) - time.sleep(random.randint(60, 300)) - stat_me_archive[k] = v - new_posts += 1 - if new_posts == 0 & (len(stat_me_archive) > 2): - print("No new papers found; posting random paper from archive") - random_paper = random.choice(list(stat_me_archive.values())) - create_post( - f"{random_paper['title']}\n{random_paper['link']}\n{''.join(random_paper['description'])}"[ - :297 - ] - + "\nšŸ“ˆšŸ¤–" - ) - time.sleep(random.randint(30, 60)) - ###################################################################### - # econometrics - ###################################################################### - new_posts = 0 - # Append new data to existing data - for k, v in em_pull.items(): - if k not in econ_em_archive: + time.sleep(random.randint(30, 60)) + ###################################################################### + # econometrics + ###################################################################### + new_posts = 0 + # Append new data to existing data + for k, v in em_pull.items(): + if k not in econ_em_archive: + create_post( + f"{v['title']}\n{v['link']}\n{''.join(v['description'])}"[:297] + + "\nšŸ“ˆšŸ¤–" + ) + time.sleep(random.randint(60, 300)) + econ_em_archive[k] = v + new_posts += 1 + if new_posts == 0 & (len(econ_em_archive) > 2): + print("No new papers found; posting random paper from archive") + random_paper = random.choice(list(econ_em_archive.values())) create_post( - f"{v['title']}\n{v['link']}\n{''.join(v['description'])}"[:297] + "\nšŸ“ˆšŸ¤–" + f"{random_paper['title']}\n{random_paper['link']}\n{''.join(random_paper['description'])}"[ + :297 + ] + + "\nšŸ“ˆšŸ¤–" ) - time.sleep(random.randint(60, 300)) - econ_em_archive[k] = v - new_posts += 1 - if new_posts == 0 & (len(econ_em_archive) > 2): - print("No new papers found; posting random paper from archive") - random_paper = random.choice(list(econ_em_archive.values())) - create_post( - f"{random_paper['title']}\n{random_paper['link']}\n{''.join(random_paper['description'])}"[ - :297 - ] - + "\nšŸ“ˆšŸ¤–" - ) - -# %% -if __name__ == "__main__": - main() +# ... # %%