From d4a4b8bddc835f621c27c7edcb6adb97ac9484fb Mon Sep 17 00:00:00 2001
From: Peter Taylor <me@et1.uk>
Date: Sun, 10 Oct 2021 23:31:39 +0100
Subject: [PATCH] Audioboom downloader (#11)

* Added Specification for the input format

* Added audioboom downloader tools

* autopep8 action fixes

Co-authored-by: Emersont1 <Emersont1@users.noreply.github.com>
---
 Specification.md                 |  22 ++++++
 audioboom/__init__.py            |   2 +
 audioboom/channel.py             | 128 +++++++++++++++++++++++++++++++
 audioboom/downloader/__main__.py |  22 ++++++
 audioboom/structures.py          |  26 +++++++
 audioboom/utils.py               |  29 +++++++
 requirements.txt                 |   1 +
 7 files changed, 230 insertions(+)
 create mode 100644 audioboom/__init__.py
 create mode 100644 audioboom/channel.py
 create mode 100644 audioboom/downloader/__main__.py
 create mode 100644 audioboom/structures.py
 create mode 100644 audioboom/utils.py

diff --git a/Specification.md b/Specification.md
index 54a74dc..7d8374d 100644
--- a/Specification.md
+++ b/Specification.md
@@ -75,3 +75,25 @@ A copy of this specification should be included in the output directory as `hstp
     -   `study-in-scarlet.mp3`
     -   `valley-of-fear.jpg`
     -   `valley-of-fear.mp3`
+
+## HSTP Serialisation Toolkit & Publisher
+
+This is the python codebase included in the repository.
+
+the command `hstp` 
+
+### Input file format
+
+-   `hstp_root.txt` - contains a list of podcasts to ignore (if any)
+    -   `podcast_slug/`
+        -   `image.jpg` - Thumbnail for the podcast
+        -   `podcast.txt` - Description of the podcast
+            -   The first line is read as the title
+            -   Subsequent lines will be read from the file as its desciption
+        -   `episode_slug/`
+             -   `episode.txt` - Description for the podcast
+                 -   The first line is read as the title
+                 -   The second line is read as the date. If it is not there, it will source if from the the created date from the MP3 file
+                 -   Subsequent lines will be read from the file as its desciption
+             -   `audio.mp3` - The audio of the podcast
+             -   `image.jpg` - (Optional) icon for the podcast
diff --git a/audioboom/__init__.py b/audioboom/__init__.py
new file mode 100644
index 0000000..03afa3c
--- /dev/null
+++ b/audioboom/__init__.py
@@ -0,0 +1,2 @@
+from .channel import *
+from .structures import *
diff --git a/audioboom/channel.py b/audioboom/channel.py
new file mode 100644
index 0000000..2194f3c
--- /dev/null
+++ b/audioboom/channel.py
@@ -0,0 +1,128 @@
+from audioboom import *
+import audioboom
+import audioboom.utils as utils
+
+from os.path import join, exists
+from os import mkdir
+import requests
+
+
+class Channel:
+    """ Representation of an audioboom channel"""
+
+    def __init__(self, id):
+        self.id = id
+
+        # get info from API
+        data = utils.make_request(f"/channels/{id}")["channel"]
+
+        self.title = data["title"]
+        self.description = data["description"]
+        self.thumbnail = data["urls"]["logo_image"]["original"]
+
+    def get_playlists(self):
+        data = utils.make_request(f"/channels/{self.id}/playlists")
+        self.playlists = []
+        for p in data["playlist"]:
+            self.playlists.append(audioboom.Playlist(p))
+
+    def get_episodes(self):
+        i = 0
+        self.episodes = []
+        while True:
+            i += 1
+
+            data = utils.make_request(
+                f"/channels/{self.id}/audio_clips"
+                f"?page[items]=150&page[number]={i}"
+            )["audio_clips"]
+
+            if len(data) == 0:
+                return
+
+            for ep in data:
+                self.episodes.append(audioboom.Episode(ep))
+
+    def save(self, root):
+        # create hstp_root.txt
+        with open(join(root, "hstp_root.txt"), 'a'):
+            pass
+
+        path = join(root, "default")
+
+        if not exists(path):
+            mkdir(path)
+
+        with open(join(path, "description.txt"), "w") as f:
+            f.write(f"{self.title}\n{self.description}")
+
+        with open(join(path, "image.jpg"), 'wb') as f:
+            i = requests.get(self.thumbnail, allow_redirects=True)
+            f.write(i.content)
+
+        consumed = []
+
+        for p in self.playlists:
+            path_ = join(root, p.slug)
+            if exists(path_):
+                raise ValueError("slug already exists")
+
+            mkdir(path_)
+            with open(join(path_, "description.txt"), "w") as f:
+                f.write(f"{p.title}\n{p.description}")
+
+            if p.thumbnail:
+                with open(join(path_, "image.jpg"), 'wb') as f:
+                    i = requests.get(p.thumbnail, allow_redirects=True)
+                    f.write(i.content)
+
+            j = 0
+            while True:
+                j += 1
+                data = utils.make_request(
+                    f"/playlists/{p.id}"
+                    f"?page[items]=150&page[number]={j}"
+                )["playlist"]["memberships"]
+
+                if len(data) == 0:
+                    break
+
+                for ep in data:
+                    id = ep["audio_clip"]["id"]
+                    if id in consumed:
+                        continue
+                    ep_ = [e for e in self.episodes if e.id == id][0]
+                    path__ = join(path_, ep_.slug)
+                    self.save_episode(path__, ep_)
+                    consumed.append(ep_.id)
+
+        # save unused episodes
+        for ep in self.episodes:
+            if ep.id in consumed:
+                continue
+            path_ = join(path, ep.slug)
+
+            while exists(path_):
+                ep.slug += '_'
+                path_ = join(path, ep.slug)
+
+            mkdir(path_)
+            self.save_episode(path_, ep)
+
+    def save_episode(self, path, ep):
+        if not exists(path):
+            mkdir(path)
+
+        with open(join(path, "description.txt"), "w") as f:
+            f.write(f"{ep.title}\n{ep.date}\n{ep.description}")
+
+        if ep.thumbnail:
+            pass
+            with open(join(path, "image.jpg"), 'wb') as f:
+                i = requests.get(ep.thumbnail, allow_redirects=True)
+                f.write(i.content)
+
+        with open(join(path, "audio.mp3"), 'wb') as f:
+            pass
+            a = requests.get(ep.mp3, allow_redirects=True)
+            f.write(a.content)
diff --git a/audioboom/downloader/__main__.py b/audioboom/downloader/__main__.py
new file mode 100644
index 0000000..fd07bc6
--- /dev/null
+++ b/audioboom/downloader/__main__.py
@@ -0,0 +1,22 @@
+import argparse
+
+import audioboom
+
+parser = argparse.ArgumentParser(
+    prog='python -m audioboom-downloader',
+    description='Download Audioboom podcasts'
+)
+
+parser.add_argument("id", help="The ID of the audioboom channel")
+parser.add_argument("-v", "--verbose", help="increase output verbosity",
+                    action="store_true")
+parser.add_argument("-o", "--output", help="output directory", default=".")
+
+args = parser.parse_args()
+
+c = audioboom.Channel(args.id)
+
+c.get_episodes()
+c.get_playlists()
+
+c.save(args.output)
diff --git a/audioboom/structures.py b/audioboom/structures.py
new file mode 100644
index 0000000..18e91a5
--- /dev/null
+++ b/audioboom/structures.py
@@ -0,0 +1,26 @@
+import json
+
+from audioboom import utils
+
+
+class Episode:
+    def __init__(self, data) -> None:
+        self.id = data["id"]
+        self.title = data["title"]
+        self.description = data["description"] if "description" in data else ""
+        urls = data["urls"]
+        self.thumbnail = urls["image"] if "image" in urls else None
+        self.mp3 = data["urls"]["high_mp3"]
+        self.date = data["uploaded_at"]
+
+        self.slug = utils.make_slug(self.title)
+
+
+class Playlist:
+    def __init__(self, data) -> None:
+        self.id = data["id"]
+        self.title = data["title"]
+        self.description = data["description"] if "description" in data else ""
+        self.thumbnail = data["image"] if "image" in data else None
+
+        self.slug = utils.make_slug(self.title)
diff --git a/audioboom/utils.py b/audioboom/utils.py
new file mode 100644
index 0000000..de07765
--- /dev/null
+++ b/audioboom/utils.py
@@ -0,0 +1,29 @@
+import requests
+
+
+def make_request(endpoint):
+    """
+    Make a request to the given URL and return the response.
+    """
+    return requests.get(
+        f"https://api.audioboom.com{endpoint}",
+        # The API needs version specifying
+        headers={'Accept': 'application/json; version=1'}
+    ).json()["body"]
+
+
+def make_slug(title):
+    long = ''.join([
+        s if s in '0123456789-abcdefghijklmnopqrstuvwxyz'
+        else '-'
+        for s in title.lower().strip()
+    ])
+    xs = [x for x in long.split("-") if not short_word(x)]
+
+    return "-".join(xs)
+
+
+def short_word(w):
+    return w in [
+        "", "the", "a"
+    ]
diff --git a/requirements.txt b/requirements.txt
index b020698..c851b3e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 pycodestyle==2.7.0
+requests==2.26.0
 python-dateutil==2.8.2
 simple-colors==0.1.5