diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java index de97c533b..099eaf73d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java @@ -1,26 +1,27 @@ package com.rarchives.ripme.ripper.rippers; import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.utils.Http; -import org.json.JSONArray; -import org.json.JSONObject; -import org.jsoup.Connection; import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; -import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class LusciousRipper extends AbstractHTMLRipper { - private static String albumid; + private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception. private static final Pattern P = Pattern.compile("^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net/albums/([-_.0-9a-zA-Z]+)/?"); + private final DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool"); public LusciousRipper(URL url) throws IOException { super(url); @@ -37,48 +38,37 @@ public String getHost() { } @Override - public List getURLsFromPage(Document page) { // gets urls for all pages through the api - List urls = new ArrayList<>(); - int totalPages = 1; - - for (int i = 1; i <= totalPages; i++) { - String APIStringWOVariables = "https://apicdn.luscious.net/graphql/nobatch/?operationName=PictureListInsideAlbum&query=%2520query%2520PictureListInsideAlbum%28%2524input%253A%2520PictureListInput%21%29%2520%257B%2520picture%2520%257B%2520list%28input%253A%2520%2524input%29%2520%257B%2520info%2520%257B%2520...FacetCollectionInfo%2520%257D%2520items%2520%257B%2520__typename%2520id%2520title%2520description%2520created%2520like_status%2520number_of_comments%2520number_of_favorites%2520moderation_status%2520width%2520height%2520resolution%2520aspect_ratio%2520url_to_original%2520url_to_video%2520is_animated%2520position%2520permissions%2520url%2520tags%2520%257B%2520category%2520text%2520url%2520%257D%2520thumbnails%2520%257B%2520width%2520height%2520size%2520url%2520%257D%2520%257D%2520%257D%2520%257D%2520%257D%2520fragment%2520FacetCollectionInfo%2520on%2520FacetCollectionInfo%2520%257B%2520page%2520has_next_page%2520has_previous_page%2520total_items%2520total_pages%2520items_per_page%2520url_complete%2520%257D%2520&variables="; - Connection con = Http.url(APIStringWOVariables + encodeVariablesPartOfURL(i, albumid)).method(Connection.Method.GET).retries(5).connection(); - con.ignoreHttpErrors(true); - con.ignoreContentType(true); - con.userAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0"); - Connection.Response res; - try { - res = con.execute(); - } catch (IOException e) { - throw new RuntimeException(e); - } - String body = res.body(); + public Document getFirstPage() throws IOException { + return super.getFirstPage(); + } - JSONObject jsonObject = new JSONObject(body); + @Override + public List getURLsFromPage(Document page) { + List urls = new ArrayList<>(); + Elements urlElements = page.select("div.item.thumbnail.ic_container > a"); + for (Element e : urlElements) { + urls.add(e.attr("abs:href")); + } - JSONObject data = jsonObject.getJSONObject("data"); - JSONObject picture = data.getJSONObject("picture"); - JSONObject list = picture.getJSONObject("list"); - JSONArray items = list.getJSONArray("items"); - JSONObject info = list.getJSONObject("info"); - totalPages = info.getInt("total_pages"); + return urls; + } - for (int j = 0; j < items.length(); j++) { - JSONObject item = items.getJSONObject(j); - String urlToOriginal = item.getString("url_to_original"); - urls.add(urlToOriginal); - } + @Override + public Document getNextPage(Document doc) throws IOException { + // luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser. + // Simply GET the nextPageUrl also works. Therefore, we do this... + Element nextPageElement = doc.select("div#next_page > div > a").first(); + if (nextPageElement == null) { + throw new IOException("No next page found."); } - return urls; + return Http.url(nextPageElement.attr("abs:href")).get(); } @Override public String getGID(URL url) throws MalformedURLException { Matcher m = P.matcher(url.toExternalForm()); if (m.matches()) { - albumid = m.group(1).split("_")[m.group(1).split("_").length - 1]; return m.group(1); } throw new MalformedURLException("Expected luscious.net URL format: " @@ -86,17 +76,79 @@ public String getGID(URL url) throws MalformedURLException { } @Override - protected void downloadURL(URL url, int index) { - addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null); + public void downloadURL(URL url, int index) { + lusciousThreadPool.addThread(new LusciousDownloadThread(url, index)); } - public static String encodeVariablesPartOfURL(int page, String albumId) { + @Override + public DownloadThreadPool getThreadPool() { + return lusciousThreadPool; + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException { + // Sanitizes the url removing GET parameters and convert to legacy api url. + // "https://legacy.luscious.net/albums/albumname" try { - String json = "{\"input\":{\"filters\":[{\"name\":\"album_id\",\"value\":\"" + albumId + "\"}],\"display\":\"rating_all_time\",\"items_per_page\":50,\"page\":" + page + "}}"; + Matcher m = P.matcher(url.toString()); + if (m.matches()) { + String sanitizedUrl = m.group(); + sanitizedUrl = sanitizedUrl.replaceFirst( + "^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net", + "https://legacy.luscious.net"); + return new URI(sanitizedUrl).toURL(); + } + + throw new Exception("ERROR: Unable to sanitize url."); + } catch (Exception e) { + LOGGER.info("Error sanitizing the url."); + LOGGER.error(e); + return super.sanitizeURL(url); + } + } - return URLEncoder.encode(json, "UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new IllegalStateException("Could not encode variables"); + @Override + public String normalizeUrl(String url) { + try { + return url.replaceFirst( + "^https?://(?:members\\.|legacy\\.)?luscious.net", "https://www.luscious.net"); + } catch (Exception e) { + LOGGER.info("Error normalizing the url."); + LOGGER.error(e); + return super.normalizeUrl(url); } } + + public class LusciousDownloadThread implements Runnable { + private final URL url; + private final int index; + + public LusciousDownloadThread(URL url, int index) { + this.url = url; + this.index = index; + } + + @Override + public void run() { + try { + Document page = Http.url(url).retries(RETRY_COUNT).get(); + + String downloadUrl = page.select(".icon-download").attr("abs:href"); + if (downloadUrl.equals("")) { + // This is here for pages with mp4s instead of images. + downloadUrl = page.select("div > video > source").attr("src"); + if (!downloadUrl.equals("")) { + throw new IOException("Could not find download url for image or video."); + } + } + + //If a valid download url was found. + addURLToDownload(new URI(downloadUrl).toURL(), getPrefix(index)); + + } catch (IOException | URISyntaxException e) { + LOGGER.error("Error downloadiong url " + url, e); + } + } + + } }