Skip to content

Commit

Permalink
Audioboom downloader (#11)
Browse files Browse the repository at this point in the history
* Added Specification for the input format

* Added audioboom downloader tools

* autopep8 action fixes

Co-authored-by: Emersont1 <[email protected]>
  • Loading branch information
Emersont1 and Emersont1 authored Oct 10, 2021
1 parent fb39d40 commit d4a4b8b
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 0 deletions.
22 changes: 22 additions & 0 deletions Specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,25 @@ A copy of this specification should be included in the output directory as `hstp
- `study-in-scarlet.mp3`
- `valley-of-fear.jpg`
- `valley-of-fear.mp3`

## HSTP Serialisation Toolkit & Publisher

This is the python codebase included in the repository.

the command `hstp`

### Input file format

- `hstp_root.txt` - contains a list of podcasts to ignore (if any)
- `podcast_slug/`
- `image.jpg` - Thumbnail for the podcast
- `podcast.txt` - Description of the podcast
- The first line is read as the title
- Subsequent lines will be read from the file as its desciption
- `episode_slug/`
- `episode.txt` - Description for the podcast
- The first line is read as the title
- The second line is read as the date. If it is not there, it will source if from the the created date from the MP3 file
- Subsequent lines will be read from the file as its desciption
- `audio.mp3` - The audio of the podcast
- `image.jpg` - (Optional) icon for the podcast
2 changes: 2 additions & 0 deletions audioboom/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .channel import *
from .structures import *
128 changes: 128 additions & 0 deletions audioboom/channel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from audioboom import *
import audioboom
import audioboom.utils as utils

from os.path import join, exists
from os import mkdir
import requests


class Channel:
""" Representation of an audioboom channel"""

def __init__(self, id):
self.id = id

# get info from API
data = utils.make_request(f"/channels/{id}")["channel"]

self.title = data["title"]
self.description = data["description"]
self.thumbnail = data["urls"]["logo_image"]["original"]

def get_playlists(self):
data = utils.make_request(f"/channels/{self.id}/playlists")
self.playlists = []
for p in data["playlist"]:
self.playlists.append(audioboom.Playlist(p))

def get_episodes(self):
i = 0
self.episodes = []
while True:
i += 1

data = utils.make_request(
f"/channels/{self.id}/audio_clips"
f"?page[items]=150&page[number]={i}"
)["audio_clips"]

if len(data) == 0:
return

for ep in data:
self.episodes.append(audioboom.Episode(ep))

def save(self, root):
# create hstp_root.txt
with open(join(root, "hstp_root.txt"), 'a'):
pass

path = join(root, "default")

if not exists(path):
mkdir(path)

with open(join(path, "description.txt"), "w") as f:
f.write(f"{self.title}\n{self.description}")

with open(join(path, "image.jpg"), 'wb') as f:
i = requests.get(self.thumbnail, allow_redirects=True)
f.write(i.content)

consumed = []

for p in self.playlists:
path_ = join(root, p.slug)
if exists(path_):
raise ValueError("slug already exists")

mkdir(path_)
with open(join(path_, "description.txt"), "w") as f:
f.write(f"{p.title}\n{p.description}")

if p.thumbnail:
with open(join(path_, "image.jpg"), 'wb') as f:
i = requests.get(p.thumbnail, allow_redirects=True)
f.write(i.content)

j = 0
while True:
j += 1
data = utils.make_request(
f"/playlists/{p.id}"
f"?page[items]=150&page[number]={j}"
)["playlist"]["memberships"]

if len(data) == 0:
break

for ep in data:
id = ep["audio_clip"]["id"]
if id in consumed:
continue
ep_ = [e for e in self.episodes if e.id == id][0]
path__ = join(path_, ep_.slug)
self.save_episode(path__, ep_)
consumed.append(ep_.id)

# save unused episodes
for ep in self.episodes:
if ep.id in consumed:
continue
path_ = join(path, ep.slug)

while exists(path_):
ep.slug += '_'
path_ = join(path, ep.slug)

mkdir(path_)
self.save_episode(path_, ep)

def save_episode(self, path, ep):
if not exists(path):
mkdir(path)

with open(join(path, "description.txt"), "w") as f:
f.write(f"{ep.title}\n{ep.date}\n{ep.description}")

if ep.thumbnail:
pass
with open(join(path, "image.jpg"), 'wb') as f:
i = requests.get(ep.thumbnail, allow_redirects=True)
f.write(i.content)

with open(join(path, "audio.mp3"), 'wb') as f:
pass
a = requests.get(ep.mp3, allow_redirects=True)
f.write(a.content)
22 changes: 22 additions & 0 deletions audioboom/downloader/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import argparse

import audioboom

parser = argparse.ArgumentParser(
prog='python -m audioboom-downloader',
description='Download Audioboom podcasts'
)

parser.add_argument("id", help="The ID of the audioboom channel")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action="store_true")
parser.add_argument("-o", "--output", help="output directory", default=".")

args = parser.parse_args()

c = audioboom.Channel(args.id)

c.get_episodes()
c.get_playlists()

c.save(args.output)
26 changes: 26 additions & 0 deletions audioboom/structures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import json

from audioboom import utils


class Episode:
def __init__(self, data) -> None:
self.id = data["id"]
self.title = data["title"]
self.description = data["description"] if "description" in data else ""
urls = data["urls"]
self.thumbnail = urls["image"] if "image" in urls else None
self.mp3 = data["urls"]["high_mp3"]
self.date = data["uploaded_at"]

self.slug = utils.make_slug(self.title)


class Playlist:
def __init__(self, data) -> None:
self.id = data["id"]
self.title = data["title"]
self.description = data["description"] if "description" in data else ""
self.thumbnail = data["image"] if "image" in data else None

self.slug = utils.make_slug(self.title)
29 changes: 29 additions & 0 deletions audioboom/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import requests


def make_request(endpoint):
"""
Make a request to the given URL and return the response.
"""
return requests.get(
f"https://api.audioboom.com{endpoint}",
# The API needs version specifying
headers={'Accept': 'application/json; version=1'}
).json()["body"]


def make_slug(title):
long = ''.join([
s if s in '0123456789-abcdefghijklmnopqrstuvwxyz'
else '-'
for s in title.lower().strip()
])
xs = [x for x in long.split("-") if not short_word(x)]

return "-".join(xs)


def short_word(w):
return w in [
"", "the", "a"
]
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pycodestyle==2.7.0
requests==2.26.0
python-dateutil==2.8.2
simple-colors==0.1.5

0 comments on commit d4a4b8b

Please sign in to comment.