From 73c8fe3e5759f1100d27570113163bc3f43f35eb Mon Sep 17 00:00:00 2001 From: Georgi Marinov Date: Sat, 4 Nov 2023 17:29:48 +0200 Subject: [PATCH] LusciousRipper fixed --- .../ripme/ripper/rippers/LusciousRipper.java | 140 ++++++------------ 1 file changed, 44 insertions(+), 96 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java index 099eaf73d..de97c533b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java @@ -1,27 +1,26 @@ package com.rarchives.ripme.ripper.rippers; import com.rarchives.ripme.ripper.AbstractHTMLRipper; -import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.utils.Http; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Connection; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; import java.net.URL; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class LusciousRipper extends AbstractHTMLRipper { - private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception. + private static String albumid; private static final Pattern P = Pattern.compile("^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net/albums/([-_.0-9a-zA-Z]+)/?"); - private final DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool"); public LusciousRipper(URL url) throws IOException { super(url); @@ -38,37 +37,48 @@ public String getHost() { } @Override - public Document getFirstPage() throws IOException { - return super.getFirstPage(); - } - - @Override - public List getURLsFromPage(Document page) { + public List getURLsFromPage(Document page) { // gets urls for all pages through the api List urls = new ArrayList<>(); - Elements urlElements = page.select("div.item.thumbnail.ic_container > a"); - for (Element e : urlElements) { - urls.add(e.attr("abs:href")); - } + int totalPages = 1; + + for (int i = 1; i <= totalPages; i++) { + String APIStringWOVariables = "https://apicdn.luscious.net/graphql/nobatch/?operationName=PictureListInsideAlbum&query=%2520query%2520PictureListInsideAlbum%28%2524input%253A%2520PictureListInput%21%29%2520%257B%2520picture%2520%257B%2520list%28input%253A%2520%2524input%29%2520%257B%2520info%2520%257B%2520...FacetCollectionInfo%2520%257D%2520items%2520%257B%2520__typename%2520id%2520title%2520description%2520created%2520like_status%2520number_of_comments%2520number_of_favorites%2520moderation_status%2520width%2520height%2520resolution%2520aspect_ratio%2520url_to_original%2520url_to_video%2520is_animated%2520position%2520permissions%2520url%2520tags%2520%257B%2520category%2520text%2520url%2520%257D%2520thumbnails%2520%257B%2520width%2520height%2520size%2520url%2520%257D%2520%257D%2520%257D%2520%257D%2520%257D%2520fragment%2520FacetCollectionInfo%2520on%2520FacetCollectionInfo%2520%257B%2520page%2520has_next_page%2520has_previous_page%2520total_items%2520total_pages%2520items_per_page%2520url_complete%2520%257D%2520&variables="; + Connection con = Http.url(APIStringWOVariables + encodeVariablesPartOfURL(i, albumid)).method(Connection.Method.GET).retries(5).connection(); + con.ignoreHttpErrors(true); + con.ignoreContentType(true); + con.userAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0"); + Connection.Response res; + try { + res = con.execute(); + } catch (IOException e) { + throw new RuntimeException(e); + } + String body = res.body(); - return urls; - } + JSONObject jsonObject = new JSONObject(body); - @Override - public Document getNextPage(Document doc) throws IOException { - // luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser. - // Simply GET the nextPageUrl also works. Therefore, we do this... - Element nextPageElement = doc.select("div#next_page > div > a").first(); - if (nextPageElement == null) { - throw new IOException("No next page found."); + JSONObject data = jsonObject.getJSONObject("data"); + JSONObject picture = data.getJSONObject("picture"); + JSONObject list = picture.getJSONObject("list"); + JSONArray items = list.getJSONArray("items"); + JSONObject info = list.getJSONObject("info"); + totalPages = info.getInt("total_pages"); + + for (int j = 0; j < items.length(); j++) { + JSONObject item = items.getJSONObject(j); + String urlToOriginal = item.getString("url_to_original"); + urls.add(urlToOriginal); + } } - return Http.url(nextPageElement.attr("abs:href")).get(); + return urls; } @Override public String getGID(URL url) throws MalformedURLException { Matcher m = P.matcher(url.toExternalForm()); if (m.matches()) { + albumid = m.group(1).split("_")[m.group(1).split("_").length - 1]; return m.group(1); } throw new MalformedURLException("Expected luscious.net URL format: " @@ -76,79 +86,17 @@ public String getGID(URL url) throws MalformedURLException { } @Override - public void downloadURL(URL url, int index) { - lusciousThreadPool.addThread(new LusciousDownloadThread(url, index)); + protected void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null); } - @Override - public DownloadThreadPool getThreadPool() { - return lusciousThreadPool; - } - - @Override - public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException { - // Sanitizes the url removing GET parameters and convert to legacy api url. - // "https://legacy.luscious.net/albums/albumname" + public static String encodeVariablesPartOfURL(int page, String albumId) { try { - Matcher m = P.matcher(url.toString()); - if (m.matches()) { - String sanitizedUrl = m.group(); - sanitizedUrl = sanitizedUrl.replaceFirst( - "^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net", - "https://legacy.luscious.net"); - return new URI(sanitizedUrl).toURL(); - } - - throw new Exception("ERROR: Unable to sanitize url."); - } catch (Exception e) { - LOGGER.info("Error sanitizing the url."); - LOGGER.error(e); - return super.sanitizeURL(url); - } - } + String json = "{\"input\":{\"filters\":[{\"name\":\"album_id\",\"value\":\"" + albumId + "\"}],\"display\":\"rating_all_time\",\"items_per_page\":50,\"page\":" + page + "}}"; - @Override - public String normalizeUrl(String url) { - try { - return url.replaceFirst( - "^https?://(?:members\\.|legacy\\.)?luscious.net", "https://www.luscious.net"); - } catch (Exception e) { - LOGGER.info("Error normalizing the url."); - LOGGER.error(e); - return super.normalizeUrl(url); + return URLEncoder.encode(json, "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new IllegalStateException("Could not encode variables"); } } - - public class LusciousDownloadThread implements Runnable { - private final URL url; - private final int index; - - public LusciousDownloadThread(URL url, int index) { - this.url = url; - this.index = index; - } - - @Override - public void run() { - try { - Document page = Http.url(url).retries(RETRY_COUNT).get(); - - String downloadUrl = page.select(".icon-download").attr("abs:href"); - if (downloadUrl.equals("")) { - // This is here for pages with mp4s instead of images. - downloadUrl = page.select("div > video > source").attr("src"); - if (!downloadUrl.equals("")) { - throw new IOException("Could not find download url for image or video."); - } - } - - //If a valid download url was found. - addURLToDownload(new URI(downloadUrl).toURL(), getPrefix(index)); - - } catch (IOException | URISyntaxException e) { - LOGGER.error("Error downloadiong url " + url, e); - } - } - - } }