From d4a4b8bddc835f621c27c7edcb6adb97ac9484fb Mon Sep 17 00:00:00 2001 From: Peter Taylor Date: Sun, 10 Oct 2021 23:31:39 +0100 Subject: [PATCH] Audioboom downloader (#11) * Added Specification for the input format * Added audioboom downloader tools * autopep8 action fixes Co-authored-by: Emersont1 --- Specification.md | 22 ++++++ audioboom/__init__.py | 2 + audioboom/channel.py | 128 +++++++++++++++++++++++++++++++ audioboom/downloader/__main__.py | 22 ++++++ audioboom/structures.py | 26 +++++++ audioboom/utils.py | 29 +++++++ requirements.txt | 1 + 7 files changed, 230 insertions(+) create mode 100644 audioboom/__init__.py create mode 100644 audioboom/channel.py create mode 100644 audioboom/downloader/__main__.py create mode 100644 audioboom/structures.py create mode 100644 audioboom/utils.py diff --git a/Specification.md b/Specification.md index 54a74dc..7d8374d 100644 --- a/Specification.md +++ b/Specification.md @@ -75,3 +75,25 @@ A copy of this specification should be included in the output directory as `hstp - `study-in-scarlet.mp3` - `valley-of-fear.jpg` - `valley-of-fear.mp3` + +## HSTP Serialisation Toolkit & Publisher + +This is the python codebase included in the repository. + +the command `hstp` + +### Input file format + +- `hstp_root.txt` - contains a list of podcasts to ignore (if any) + - `podcast_slug/` + - `image.jpg` - Thumbnail for the podcast + - `podcast.txt` - Description of the podcast + - The first line is read as the title + - Subsequent lines will be read from the file as its desciption + - `episode_slug/` + - `episode.txt` - Description for the podcast + - The first line is read as the title + - The second line is read as the date. If it is not there, it will source if from the the created date from the MP3 file + - Subsequent lines will be read from the file as its desciption + - `audio.mp3` - The audio of the podcast + - `image.jpg` - (Optional) icon for the podcast diff --git a/audioboom/__init__.py b/audioboom/__init__.py new file mode 100644 index 0000000..03afa3c --- /dev/null +++ b/audioboom/__init__.py @@ -0,0 +1,2 @@ +from .channel import * +from .structures import * diff --git a/audioboom/channel.py b/audioboom/channel.py new file mode 100644 index 0000000..2194f3c --- /dev/null +++ b/audioboom/channel.py @@ -0,0 +1,128 @@ +from audioboom import * +import audioboom +import audioboom.utils as utils + +from os.path import join, exists +from os import mkdir +import requests + + +class Channel: + """ Representation of an audioboom channel""" + + def __init__(self, id): + self.id = id + + # get info from API + data = utils.make_request(f"/channels/{id}")["channel"] + + self.title = data["title"] + self.description = data["description"] + self.thumbnail = data["urls"]["logo_image"]["original"] + + def get_playlists(self): + data = utils.make_request(f"/channels/{self.id}/playlists") + self.playlists = [] + for p in data["playlist"]: + self.playlists.append(audioboom.Playlist(p)) + + def get_episodes(self): + i = 0 + self.episodes = [] + while True: + i += 1 + + data = utils.make_request( + f"/channels/{self.id}/audio_clips" + f"?page[items]=150&page[number]={i}" + )["audio_clips"] + + if len(data) == 0: + return + + for ep in data: + self.episodes.append(audioboom.Episode(ep)) + + def save(self, root): + # create hstp_root.txt + with open(join(root, "hstp_root.txt"), 'a'): + pass + + path = join(root, "default") + + if not exists(path): + mkdir(path) + + with open(join(path, "description.txt"), "w") as f: + f.write(f"{self.title}\n{self.description}") + + with open(join(path, "image.jpg"), 'wb') as f: + i = requests.get(self.thumbnail, allow_redirects=True) + f.write(i.content) + + consumed = [] + + for p in self.playlists: + path_ = join(root, p.slug) + if exists(path_): + raise ValueError("slug already exists") + + mkdir(path_) + with open(join(path_, "description.txt"), "w") as f: + f.write(f"{p.title}\n{p.description}") + + if p.thumbnail: + with open(join(path_, "image.jpg"), 'wb') as f: + i = requests.get(p.thumbnail, allow_redirects=True) + f.write(i.content) + + j = 0 + while True: + j += 1 + data = utils.make_request( + f"/playlists/{p.id}" + f"?page[items]=150&page[number]={j}" + )["playlist"]["memberships"] + + if len(data) == 0: + break + + for ep in data: + id = ep["audio_clip"]["id"] + if id in consumed: + continue + ep_ = [e for e in self.episodes if e.id == id][0] + path__ = join(path_, ep_.slug) + self.save_episode(path__, ep_) + consumed.append(ep_.id) + + # save unused episodes + for ep in self.episodes: + if ep.id in consumed: + continue + path_ = join(path, ep.slug) + + while exists(path_): + ep.slug += '_' + path_ = join(path, ep.slug) + + mkdir(path_) + self.save_episode(path_, ep) + + def save_episode(self, path, ep): + if not exists(path): + mkdir(path) + + with open(join(path, "description.txt"), "w") as f: + f.write(f"{ep.title}\n{ep.date}\n{ep.description}") + + if ep.thumbnail: + pass + with open(join(path, "image.jpg"), 'wb') as f: + i = requests.get(ep.thumbnail, allow_redirects=True) + f.write(i.content) + + with open(join(path, "audio.mp3"), 'wb') as f: + pass + a = requests.get(ep.mp3, allow_redirects=True) + f.write(a.content) diff --git a/audioboom/downloader/__main__.py b/audioboom/downloader/__main__.py new file mode 100644 index 0000000..fd07bc6 --- /dev/null +++ b/audioboom/downloader/__main__.py @@ -0,0 +1,22 @@ +import argparse + +import audioboom + +parser = argparse.ArgumentParser( + prog='python -m audioboom-downloader', + description='Download Audioboom podcasts' +) + +parser.add_argument("id", help="The ID of the audioboom channel") +parser.add_argument("-v", "--verbose", help="increase output verbosity", + action="store_true") +parser.add_argument("-o", "--output", help="output directory", default=".") + +args = parser.parse_args() + +c = audioboom.Channel(args.id) + +c.get_episodes() +c.get_playlists() + +c.save(args.output) diff --git a/audioboom/structures.py b/audioboom/structures.py new file mode 100644 index 0000000..18e91a5 --- /dev/null +++ b/audioboom/structures.py @@ -0,0 +1,26 @@ +import json + +from audioboom import utils + + +class Episode: + def __init__(self, data) -> None: + self.id = data["id"] + self.title = data["title"] + self.description = data["description"] if "description" in data else "" + urls = data["urls"] + self.thumbnail = urls["image"] if "image" in urls else None + self.mp3 = data["urls"]["high_mp3"] + self.date = data["uploaded_at"] + + self.slug = utils.make_slug(self.title) + + +class Playlist: + def __init__(self, data) -> None: + self.id = data["id"] + self.title = data["title"] + self.description = data["description"] if "description" in data else "" + self.thumbnail = data["image"] if "image" in data else None + + self.slug = utils.make_slug(self.title) diff --git a/audioboom/utils.py b/audioboom/utils.py new file mode 100644 index 0000000..de07765 --- /dev/null +++ b/audioboom/utils.py @@ -0,0 +1,29 @@ +import requests + + +def make_request(endpoint): + """ + Make a request to the given URL and return the response. + """ + return requests.get( + f"https://api.audioboom.com{endpoint}", + # The API needs version specifying + headers={'Accept': 'application/json; version=1'} + ).json()["body"] + + +def make_slug(title): + long = ''.join([ + s if s in '0123456789-abcdefghijklmnopqrstuvwxyz' + else '-' + for s in title.lower().strip() + ]) + xs = [x for x in long.split("-") if not short_word(x)] + + return "-".join(xs) + + +def short_word(w): + return w in [ + "", "the", "a" + ] diff --git a/requirements.txt b/requirements.txt index b020698..c851b3e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ pycodestyle==2.7.0 +requests==2.26.0 python-dateutil==2.8.2 simple-colors==0.1.5