Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exploratory script for parsing astro.ph feed #2

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
# astro-ph-bots
A collection of bots that post new papers added to astro-ph to Bluesky.

## `newsub`-bot
A Bluesky bot that posts the abstract link and metadata of new submissions each day.
100 changes: 100 additions & 0 deletions newsubs/hello_arxiv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import arxiv
from datetime import datetime, timedelta, timezone
from pylatexenc.latex2text import LatexNodes2Text

CHARLIM = 300

client = arxiv.Client()

subcats = ['CO','EP','GA','HE','IM','SR']
subcat_emoji = ['🔮','🪐','🌀','🎆','🛠️','✨']
emojidict = dict(zip(subcats, subcat_emoji))

def formatTex(s):
"""
pre-pre-processs and then convert any TeX -> Unicode
"""
deamp = s.replace('&', '&')
return LatexNodes2Text().latex_to_text(deamp)

def formatAuthors(authors, shortnames=False):
"""
format the author list
"""
namestring = ''
trunc = None
nauth = len(authors)
if nauth > 3:
trunc = 1
delims = []
elif nauth != 1:
delims = [","]*(len(authors[:trunc]) - 2) + [" &"]
else:
delims = []
delims += [""]
i = 0
for auth in authors[:trunc]:
if shortnames:
# can shorten names for space, but
# seems hard to do this reliably everywhere
fullname = auth.name.split(' ')
for name in fullname[:-1]:
if name[0].islower():
namestring += f"{name}"
else:
namestring += f"{name[0]}. "
if '-' in name:
namestring += f"-{name.split('-')[-1][0]}. "
namestring += f"{fullname[-1]}{delims[i]} "
else:
namestring += f"{auth.name}{delims[i]} "
i+=1
if trunc:
namestring = namestring[:-1] + " et al."
return namestring

now = datetime.now(timezone(timedelta(hours=-5)))
query = "cat:astro-ph.*"

search = arxiv.Search(
query = query,
max_results = 200,
sort_by = arxiv.SortCriterion.SubmittedDate
)

results = client.results(search)

posts = []
feeds = []

for paper in results:
post = []
if (now-paper.published).days >= 1:
continue
emojis = ''
feedlist = []
for c in paper.categories:
if "astro-ph." in c:
feed = c.split('.')[-1]
# add emoji!
emojis += emojidict[feed]
feedlist.append(feed)
post.append(f"{formatTex(paper.title)}")
namestring = formatAuthors(paper.authors, shortnames=False)
post.append(f"{namestring}")
# don't need https:// prefix for bsky
abs_url = paper.pdf_url.split('//')[-1].replace("pdf", "abs")
post.append(f"{abs_url} {emojis}")
full_len = len('\n'.join(post))
if full_len > CHARLIM:
char_surplus = full_len - CHARLIM + 3
title_words = post[0].split(" ")
while char_surplus >= 0 :
char_surplus -= len(title_words[-1])
del title_words[-1]
post[0] = ' '.join(title_words) + "..."
post_string = '\n'.join(post)
posts.append(post_string)
feeds.append(feedlist)
print(post_string)
print(f"[posted {paper.published} - was {full_len} of {CHARLIM} characters, reduced by {full_len-len(post_string)}]\n")
156 changes: 156 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# From https://github.com/pypa/sampleproject/blob/main/pyproject.toml

[project]
# This is the name of your project. The first time you publish this
# package, this name will be registered for you. It will determine how
# users can install this project, e.g.:
#
# $ pip install sampleproject
#
# And where it will live on PyPI: https://pypi.org/project/sampleproject/
#
# There are some restrictions on what makes a valid project name
# specification here:
# https://packaging.python.org/specifications/core-metadata/#name
name = "astro-ph-bot" # Required

# Versions should comply with PEP 440:
# https://www.python.org/dev/peps/pep-0440/
#
# For a discussion on single-sourcing the version, see
# https://packaging.python.org/guides/single-sourcing-package-version/
version = "0.0.1" # Required

# This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field:
# https://packaging.python.org/specifications/core-metadata/#summary
description = "A bot for collecting daily mailings on astro-ph, formatting and posting on Bluesky"

# This is an optional longer description of your project that represents
# the body of text which users will see when they visit PyPI.
#
# Often, this is the same as your README, so you can just read it in from
# that file directly (as we have already done above)
#
# This field corresponds to the "Description" metadata field:
# https://packaging.python.org/specifications/core-metadata/#description-optional
readme = "README.md" # Optional

# Specify which Python versions you support. In contrast to the
# 'Programming Language' classifiers above, 'pip install' will check this
# and refuse to install the project if the version does not match. See
# https://packaging.python.org/guides/distributing-packages-using-setuptools/#python-requires
requires-python = ">=3.12,<3.13"

# This is either text indicating the license for the distribution, or a file
# that contains the license
# https://packaging.python.org/en/latest/specifications/core-metadata/#license
license = {file = "LICENSE"}

# This field adds keywords for your project which will appear on the
# project page. What does your project relate to?
#
# Note that this is a list of additional keywords, separated
# by commas, to be used to assist searching for the distribution in a
# larger catalog.
keywords = ["bluesky", "astronomy"] # Optional

# This should be your name or the name of the organization who originally
# authored the project, and a valid email address corresponding to the name
# listed.
authors = [
{name = "James W. Trayford", email = "[email protected]" } # Optional
]

# This should be your name or the names of the organization who currently
# maintains the project, and a valid email address corresponding to the name
# listed.
maintainers = [
{name = "James W. Trayford", email = "[email protected]" } # Optional
]

# Classifiers help users find your project by categorizing it.
#
# For a list of valid classifiers, see https://pypi.org/classifiers/
classifiers = [ # Optional
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
"Development Status :: 3 - Alpha",

# Indicate who your project is intended for
"Intended Audience :: Developers",
"Topic :: Software Development :: Build Tools",

# Pick your license as you wish
"License :: OSI Approved :: MIT License",

# Specify the Python versions you support here. In particular, ensure
# that you indicate you support Python 3. These classifiers are *not*
# checked by "pip install". See instead "python_requires" below.
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3 :: Only",
]
dependencies = [
"arxiv>=2.1.3",
"pylatexenc>=2.10",
]

# This field lists other packages that your project depends on to run.
# Any package you put here will be installed by pip when your project is
# installed, so they must be valid existing projects.
#
# For an analysis of this field vs pip's requirements files see:
# https://packaging.python.org/discussions/install-requires-vs-requirements/
# dependencies = [ # Optional
# "astrofeed_lib@git+https://github.com/bluesky-astronomy/astrofeed-lib",
# ]

# List additional groups of dependencies here (e.g. development
# dependencies). Users will be able to install these using the "extras"
# syntax, for example:
#
# $ pip install sampleproject[dev]
#
# Similar to `dependencies` above, these must be valid existing
# projects.
# [project.optional-dependencies] # Optional
# dev = ["check-manifest"]
# test = ["coverage"]

# List URLs that are relevant to your project
#
# This field corresponds to the "Project-URL" and "Home-Page" metadata fields:
# https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use
# https://packaging.python.org/specifications/core-metadata/#home-page-optional
#
# Examples listed include a pattern for specifying where the package tracks
# issues, where the source is hosted, where to say thanks to the package
# maintainers, and where to support the project financially. The key is
# what's used to render the link text on PyPI.
[project.urls] # Optional
"Homepage" = "https://astronomy.blue/"
"Bug Reports" = "https://github.com/bluesky-astronomy/astro-ph-bots.git"
# "Funding" = "https://donate.pypi.org"
# "Say Thanks!" = "http://saythanks.io/to/example"
# "Source" = "https://github.com/pypa/sampleproject/"

# The following would provide a command line executable called `sample`
# which executes the function `main` from this package when invoked.
# [project.scripts] # Optional
# sample = "sample:main"

# This is configuration specific to the `setuptools` build backend.
# If you are using a different build backend, you will need to change this.
# [tool.setuptools]
# # If there are data files included in your packages that need to be
# # installed, specify them here.
# package-data = {"sample" = ["*.dat"]}

[build-system]
# These are the assumed default build requirements from pip:
# https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support
requires = ["setuptools>=43.0.0", "wheel"]
build-backend = "setuptools.build_meta"