last.fm working

nathom · Apr 9, 2021 · 0d2ca55 · 0d2ca55
1 parent b2f75cd
commit 0d2ca55
Show file tree

Hide file tree

Showing 7 changed files with 101 additions and 49 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,6 @@
 click
 ruamel.yaml
 packaging
-bs4
 pathvalidate
 requests
 mutagen

diff --git a/streamrip/cli.py b/streamrip/cli.py
@@ -216,6 +216,33 @@ def discover(ctx, **kwargs):
         none_chosen()
 
 
+@cli.command()
+@click.option(
+    "-s", "--source", help="Qobuz, Tidal, Deezer, or SoundCloud. Default: Qobuz."
+)
+@click.argument("URL")
+@click.pass_context
+def lastfm(ctx, source, url):
+    """Searches for tracks from a last.fm playlist on a given source.
+
+    Examples:
+
+        $ rip lastfm https://www.last.fm/user/nathan3895/playlists/12059037
+
+        Download a playlist using Qobuz as the source
+
+        $ rip lastfm -s tidal https://www.last.fm/user/nathan3895/playlists/12059037
+
+        Download a playlist using Tidal as the source
+    """
+
+    if source is not None:
+        config.session["lastfm"]["source"] = source
+
+    core.handle_lastfm_urls(url)
+    core.download()
+
+
 @cli.command()
 @click.option("-o", "--open", is_flag=True, help="Open the config file")
 @click.option("-q", "--qobuz", is_flag=True, help="Set Qobuz credentials")

diff --git a/streamrip/config.py b/streamrip/config.py
@@ -82,7 +82,7 @@ class Config:
         },
         "path_format": {"folder": FOLDER_FORMAT, "track": TRACK_FORMAT},
         "check_for_updates": True,
-        "lastfm": {"source": "qobuz"}
+        "lastfm": {"source": "qobuz"},
     }
 
     def __init__(self, path: str = None):

diff --git a/streamrip/core.py b/streamrip/core.py
@@ -1,30 +1,30 @@
 import logging
-import time
 import os
 import re
 import sys
+import time
 from getpass import getpass
 from hashlib import md5
 from string import Formatter
 from typing import Generator, Optional, Tuple, Union
 
 import click
 import requests
-from bs4 import BeautifulSoup
+from tqdm import tqdm
 
 from .clients import DeezerClient, QobuzClient, SoundCloudClient, TidalClient
 from .config import Config
 from .constants import (
     CONFIG_PATH,
     DB_PATH,
+    LASTFM_URL_REGEX,
     MEDIA_TYPES,
     SOUNDCLOUD_URL_REGEX,
-    LASTFM_URL_REGEX,
     URL_REGEX,
 )
 from .db import MusicDB
 from .downloader import Album, Artist, Label, Playlist, Track, Tracklist
-from .exceptions import AuthenticationError, ParsingError
+from .exceptions import AuthenticationError, NoResultsFound, ParsingError
 from .utils import capitalize
 
 logger = logging.getLogger(__name__)
@@ -114,18 +114,15 @@ def assert_creds(self, source: str):
             self.prompt_creds(source)
 
     def handle_urls(self, url: str):
-        """Download an url
+        """Download a url
 
         :param url:
         :type url: str
         :raises InvalidSourceError
         :raises ParsingError
         """
-        parsed_info = self.parse_urls(url)
-        if parsed_info is None:
-            return
 
-        for source, url_type, item_id in parsed_info:
+        for source, url_type, item_id in self.parse_urls(url):
             if item_id in self.db:
                 logger.info(
                     f"ID {item_id} already downloaded, use --no-db to override."
@@ -152,7 +149,6 @@ def handle_item(self, source: str, media_type: str, item_id: str):
         self.append(item)
 
     def download(self):
-
         arguments = {
             "database": self.db,
             "parent_folder": self.config.session["downloads"]["folder"],
@@ -192,7 +188,7 @@ def download(self):
             else:
                 item.download(**arguments)
 
-            if self.db != [] and hasattr(item, 'id'):
+            if self.db != [] and hasattr(item, "id"):
                 self.db.add(item.id)
 
             if self.config.session["conversion"]["enabled"]:
@@ -246,9 +242,6 @@ def parse_urls(self, url: str) -> Tuple[str, str]:
         parsed = self.url_parse.findall(url)  # Qobuz, Tidal, Dezer
         soundcloud_urls = self.soundcloud_url_parse.findall(url)
         soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls]
-        lastfm_urls = self.lastfm_url_parse.findall(url)
-        if lastfm_urls:
-            self.handle_lastfm_urls(lastfm_urls)
 
         parsed.extend(
             ("soundcloud", item["kind"], url)
@@ -260,22 +253,30 @@ def parse_urls(self, url: str) -> Tuple[str, str]:
         if parsed != []:
             return parsed
 
-        if not lastfm_urls:
-            raise ParsingError(f"Error parsing URL: `{url}`")
+        raise ParsingError(f"Error parsing URL: `{url}`")
 
-    def handle_lastfm_urls(self, lastfm_urls):
-        lastfm_source = self.config.session['lastfm']['source']
+    def handle_lastfm_urls(self, urls):
+        lastfm_urls = self.lastfm_url_parse.findall(urls)
+        lastfm_source = self.config.session["lastfm"]["source"]
         for purl in lastfm_urls:
+            click.secho(f"Fetching playlist at {purl}", fg="blue")
             title, queries = self.get_lastfm_playlist(purl)
 
             pl = Playlist(client=self.clients[lastfm_source], name=title)
-            for query in queries:
-                click.secho(f'Searching for "{query}"', fg='cyan')
-                track = next(self.search(lastfm_source, query, media_type='track'))
+            tracks_not_found = 0
+            for title, artist in tqdm(queries, unit="tracks", desc="Searching"):
+                query = f"{title} {artist}"
+
+                try:
+                    track = next(self.search(lastfm_source, query, media_type="track"))
+                except NoResultsFound:
+                    tracks_not_found += 1
+                    continue
+
                 pl.append(track)
                 pl.loaded = True
-                time.sleep(0.2)  # max 5 requests/s
 
+            click.secho(f"{tracks_not_found} tracks not found.", fg='yellow')
             self.append(pl)
 
     def handle_txt(self, filepath: Union[str, os.PathLike]):
@@ -312,9 +313,13 @@ def search(
                     if i > limit:
                         return
         else:
-            for item in (
+            items = (
                 results.get("data") or results.get("items") or results.get("collection")
-            ):
+            )
+            if items is None:
+                raise NoResultsFound(query)
+
+            for item in items:
                 yield MEDIA_CLASS[media_type].from_api(item, client)
                 i += 1
                 if i > limit:
@@ -424,22 +429,34 @@ def from_title(s):
                 return True
 
     def get_lastfm_playlist(self, url: str) -> Tuple[str, list]:
-        # code from qobuz-dl
-        try:
-            r = requests.get(url, timeout=10)
-        except requests.exceptions.RequestException:
-            click.secho("Unable to fetch playlist", fg="red")
-            return
-
-        soup = BeautifulSoup(r.content, "html.parser")
-        artists = (artist.text for artist in soup.select("td.chartlist-artist > a"))
-        titles = (title.text for title in soup.select("td.chartlist-name > a"))
-
-        queries = [f"{artist} {title}" for artist, title in zip(artists, titles)]
-
-        if not queries:
-            click.secho("No tracks found", fg="red")
-            return
-
-        title = soup.select_one("h1").text
-        return title, queries
+        info = []
+        words = re.compile(r"[\w\s]+")
+        title_tags = re.compile('title="([^"]+)"')
+
+        def essence(s):
+            s = re.sub(r"&#\d+;", "", s)  # remove HTML entities
+            return "".join(words.findall(s))
+
+        def get_titles(s):
+            titles = title_tags.findall(s)[2:]
+            for i in range(0, len(titles) - 1, 2):
+                info.append((essence(titles[i]), essence(titles[i + 1])))
+
+        r = requests.get(url)
+        get_titles(r.text)
+        remaining_tracks = (
+            int(re.search(r'data-playlisting-entry-count="(\d+)"', r.text).group(1))
+            - 50
+        )
+        playlist_title = re.search(
+            r'<h1 class="playlisting-playlist-header-title">([^<]+)</h1>', r.text
+        ).group(1)
+
+        page = 1
+        while remaining_tracks > 0:
+            page += 1
+            r = requests.get(f"{url}?page={page}")
+            get_titles(r.text)
+            remaining_tracks -= 50
+
+        return playlist_title, info
diff --git a/streamrip/downloader.py b/streamrip/downloader.py
@@ -912,7 +912,8 @@ def download(
             tqdm_download(self.cover_urls[embed_cover_size], cover_path)
             if (
                 self.cover_urls.get(download_cover_size, embed_cover_size)
-                != embed_cover_size or os.path.size(cover_path) > FLAC_MAX_BLOCKSIZE
+                != embed_cover_size
+                or os.path.size(cover_path) > FLAC_MAX_BLOCKSIZE
             ):
                 # download cover at another resolution but don't use for embed
                 embed_cover_path = cover_path.replace(".jpg", "_embed.jpg")

diff --git a/streamrip/exceptions.py b/streamrip/exceptions.py
@@ -44,3 +44,7 @@ class BadEncoderOption(Exception):
 
 class ConversionError(Exception):
     pass
+
+
+class NoResultsFound(Exception):
+    pass
diff --git a/streamrip/metadata.py b/streamrip/metadata.py
@@ -100,9 +100,13 @@ def add_album_meta(self, resp: dict):
             self.albumartist = safe_get(resp, "artist", "name")
             self.label = resp.get("label")
             self.description = resp.get("description")
-            self.disctotal = max(
-                track.get("media_number", 1) for track in safe_get(resp, 'tracks', 'items', default=[{}])
-            ) or 1
+            self.disctotal = (
+                max(
+                    track.get("media_number", 1)
+                    for track in safe_get(resp, "tracks", "items", default=[{}])
+                )
+                or 1
+            )
             self.explicit = resp.get("parental_warning", False)
 
             if isinstance(self.label, dict):
-Original file line number
+Diff line change
@@ Expand Up / @@ -44,3 +44,7 @@ class BadEncoderOption(Exception): @@
     class ConversionError(Exception):
         pass
+    class NoResultsFound(Exception):
+        pass