Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LusciousRipper fixed #156

Merged
merged 1 commit into from
Nov 18, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 44 additions & 96 deletions src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
package com.rarchives.ripme.ripper.rippers;

import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class LusciousRipper extends AbstractHTMLRipper {
private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.
private static String albumid;

private static final Pattern P = Pattern.compile("^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net/albums/([-_.0-9a-zA-Z]+)/?");
private final DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");

public LusciousRipper(URL url) throws IOException {
super(url);
Expand All @@ -38,117 +37,66 @@ public String getHost() {
}

@Override
public Document getFirstPage() throws IOException {
return super.getFirstPage();
}

@Override
public List<String> getURLsFromPage(Document page) {
public List<String> getURLsFromPage(Document page) { // gets urls for all pages through the api
List<String> urls = new ArrayList<>();
Elements urlElements = page.select("div.item.thumbnail.ic_container > a");
for (Element e : urlElements) {
urls.add(e.attr("abs:href"));
}
int totalPages = 1;

for (int i = 1; i <= totalPages; i++) {
String APIStringWOVariables = "https://apicdn.luscious.net/graphql/nobatch/?operationName=PictureListInsideAlbum&query=%2520query%2520PictureListInsideAlbum%28%2524input%253A%2520PictureListInput%21%29%2520%257B%2520picture%2520%257B%2520list%28input%253A%2520%2524input%29%2520%257B%2520info%2520%257B%2520...FacetCollectionInfo%2520%257D%2520items%2520%257B%2520__typename%2520id%2520title%2520description%2520created%2520like_status%2520number_of_comments%2520number_of_favorites%2520moderation_status%2520width%2520height%2520resolution%2520aspect_ratio%2520url_to_original%2520url_to_video%2520is_animated%2520position%2520permissions%2520url%2520tags%2520%257B%2520category%2520text%2520url%2520%257D%2520thumbnails%2520%257B%2520width%2520height%2520size%2520url%2520%257D%2520%257D%2520%257D%2520%257D%2520%257D%2520fragment%2520FacetCollectionInfo%2520on%2520FacetCollectionInfo%2520%257B%2520page%2520has_next_page%2520has_previous_page%2520total_items%2520total_pages%2520items_per_page%2520url_complete%2520%257D%2520&variables=";
Connection con = Http.url(APIStringWOVariables + encodeVariablesPartOfURL(i, albumid)).method(Connection.Method.GET).retries(5).connection();
con.ignoreHttpErrors(true);
con.ignoreContentType(true);
con.userAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0");
Connection.Response res;
try {
res = con.execute();
} catch (IOException e) {
throw new RuntimeException(e);
}
String body = res.body();

return urls;
}
JSONObject jsonObject = new JSONObject(body);

@Override
public Document getNextPage(Document doc) throws IOException {
// luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
// Simply GET the nextPageUrl also works. Therefore, we do this...
Element nextPageElement = doc.select("div#next_page > div > a").first();
if (nextPageElement == null) {
throw new IOException("No next page found.");
JSONObject data = jsonObject.getJSONObject("data");
JSONObject picture = data.getJSONObject("picture");
JSONObject list = picture.getJSONObject("list");
JSONArray items = list.getJSONArray("items");
JSONObject info = list.getJSONObject("info");
totalPages = info.getInt("total_pages");

for (int j = 0; j < items.length(); j++) {
JSONObject item = items.getJSONObject(j);
String urlToOriginal = item.getString("url_to_original");
urls.add(urlToOriginal);
}
}

return Http.url(nextPageElement.attr("abs:href")).get();
return urls;
}

@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m = P.matcher(url.toExternalForm());
if (m.matches()) {
albumid = m.group(1).split("_")[m.group(1).split("_").length - 1];
return m.group(1);
}
throw new MalformedURLException("Expected luscious.net URL format: "
+ "luscious.net/albums/albumname \n members.luscious.net/albums/albumname - got " + url + " instead.");
}

@Override
public void downloadURL(URL url, int index) {
lusciousThreadPool.addThread(new LusciousDownloadThread(url, index));
protected void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
}

@Override
public DownloadThreadPool getThreadPool() {
return lusciousThreadPool;
}

@Override
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
// Sanitizes the url removing GET parameters and convert to legacy api url.
// "https://legacy.luscious.net/albums/albumname"
public static String encodeVariablesPartOfURL(int page, String albumId) {
try {
Matcher m = P.matcher(url.toString());
if (m.matches()) {
String sanitizedUrl = m.group();
sanitizedUrl = sanitizedUrl.replaceFirst(
"^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net",
"https://legacy.luscious.net");
return new URI(sanitizedUrl).toURL();
}

throw new Exception("ERROR: Unable to sanitize url.");
} catch (Exception e) {
LOGGER.info("Error sanitizing the url.");
LOGGER.error(e);
return super.sanitizeURL(url);
}
}
String json = "{\"input\":{\"filters\":[{\"name\":\"album_id\",\"value\":\"" + albumId + "\"}],\"display\":\"rating_all_time\",\"items_per_page\":50,\"page\":" + page + "}}";

@Override
public String normalizeUrl(String url) {
try {
return url.replaceFirst(
"^https?://(?:members\\.|legacy\\.)?luscious.net", "https://www.luscious.net");
} catch (Exception e) {
LOGGER.info("Error normalizing the url.");
LOGGER.error(e);
return super.normalizeUrl(url);
return URLEncoder.encode(json, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new IllegalStateException("Could not encode variables");
}
}

public class LusciousDownloadThread implements Runnable {
private final URL url;
private final int index;

public LusciousDownloadThread(URL url, int index) {
this.url = url;
this.index = index;
}

@Override
public void run() {
try {
Document page = Http.url(url).retries(RETRY_COUNT).get();

String downloadUrl = page.select(".icon-download").attr("abs:href");
if (downloadUrl.equals("")) {
// This is here for pages with mp4s instead of images.
downloadUrl = page.select("div > video > source").attr("src");
if (!downloadUrl.equals("")) {
throw new IOException("Could not find download url for image or video.");
}
}

//If a valid download url was found.
addURLToDownload(new URI(downloadUrl).toURL(), getPrefix(index));

} catch (IOException | URISyntaxException e) {
LOGGER.error("Error downloadiong url " + url, e);
}
}

}
}