getURLsFromPage(Document page) {
@Override
public void downloadURL(URL url, int index) {
- EHentaiImageThread t = new EHentaiImageThread(url, index, this.workingDir);
+ EHentaiImageThread t = new EHentaiImageThread(url, index, this.workingDir.toPath());
ehentaiThreadPool.addThread(t);
try {
Thread.sleep(IMAGE_SLEEP_TIME);
- }
- catch (InterruptedException e) {
+ } catch (InterruptedException e) {
LOGGER.warn("Interrupted while waiting to load next image", e);
}
}
/**
* Helper class to find and download images found on "image" pages
- *
+ *
* Handles case when site has IP-banned the user.
*/
- private class EHentaiImageThread extends Thread {
- private URL url;
- private int index;
- private File workingDir;
+ private class EHentaiImageThread implements Runnable {
+ private final URL url;
+ private final int index;
+ private final Path workingDir;
- EHentaiImageThread(URL url, int index, File workingDir) {
+ EHentaiImageThread(URL url, int index, Path workingDir) {
super();
this.url = url;
this.index = index;
@@ -246,22 +239,21 @@ private void fetchImage() {
Matcher m = p.matcher(imgsrc);
if (m.matches()) {
// Manually discover filename from URL
- String savePath = this.workingDir + File.separator;
+ String savePath = this.workingDir + "/";
if (Utils.getConfigBoolean("download.save_order", true)) {
savePath += String.format("%03d_", index);
}
savePath += m.group(1);
- addURLToDownload(new URL(imgsrc), new File(savePath));
- }
- else {
+ addURLToDownload(new URI(imgsrc).toURL(), Paths.get(savePath));
+ } else {
// Provide prefix and let the AbstractRipper "guess" the filename
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
- addURLToDownload(new URL(imgsrc), prefix);
+ addURLToDownload(new URI(imgsrc).toURL(), prefix);
}
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
index 22968216f..7cfd568f3 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
@@ -1,8 +1,7 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
+import java.net.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -10,8 +9,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import com.rarchives.ripme.utils.Utils;
-import org.json.JSONObject;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
@@ -23,13 +20,7 @@
public class EightmusesRipper extends AbstractHTMLRipper {
- private Document albumDoc = null;
- private Map cookies = new HashMap<>();
- // TODO put up a wiki page on using maps to store titles
- // the map for storing the title of each album when downloading sub albums
- private Map urlTitles = new HashMap<>();
-
- private Boolean rippingSubalbums = false;
+ private Map cookies = new HashMap<>();
public EightmusesRipper(URL url) throws IOException {
super(url);
@@ -61,10 +52,10 @@ public String getGID(URL url) throws MalformedURLException {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title as GID
- Element titleElement = getFirstPage().select("meta[name=description]").first();
+ Element titleElement = getCachedFirstPage().select("meta[name=description]").first();
String title = titleElement.attr("content");
title = title.replace("A huge collection of free porn comics for adults. Read", "");
title = title.replace("online for free at 8muses.com", "");
@@ -78,21 +69,18 @@ public String getAlbumTitle(URL url) throws MalformedURLException {
@Override
public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- Response resp = Http.url(url).response();
- cookies.putAll(resp.cookies());
- albumDoc = resp.parse();
- }
- return albumDoc;
+ Response resp = Http.url(url).response();
+ cookies.putAll(resp.cookies());
+ return resp.parse();
}
@Override
public List getURLsFromPage(Document page) {
List imageURLs = new ArrayList<>();
- int x = 1;
// This contains the thumbnails of all images on the page
Elements pageImages = page.getElementsByClass("c-tile");
- for (Element thumb : pageImages) {
+ for (int i = 0; i < pageImages.size(); i++) {
+ Element thumb = pageImages.get(i);
// If true this link is a sub album
if (thumb.attr("href").contains("/comics/album/")) {
String subUrl = "https://www.8muses.com" + thumb.attr("href");
@@ -116,24 +104,14 @@ public List getURLsFromPage(Document page) {
if (thumb.hasAttr("data-cfsrc")) {
image = thumb.attr("data-cfsrc");
} else {
- // Deobfustace the json data
- String rawJson = deobfuscateJSON(page.select("script#ractive-public").html()
- .replaceAll(">", ">").replaceAll("<", "<").replace("&", "&"));
- JSONObject json = new JSONObject(rawJson);
+ Element imageElement = thumb.select("img").first();
+ image = "https://comics.8muses.com" + imageElement.attr("data-src").replace("/th/", "/fl/");
try {
- for (int i = 0; i != json.getJSONArray("pictures").length(); i++) {
- image = "https://www.8muses.com/image/fl/" + json.getJSONArray("pictures").getJSONObject(i).getString("publicUri");
- URL imageUrl = new URL(image);
- addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, "", null, true);
- // X is our page index
- x++;
- if (isThisATest()) {
- break;
- }
- }
- return imageURLs;
- } catch (MalformedURLException e) {
+ URL imageUrl = new URI(image).toURL();
+ addURLToDownload(imageUrl, getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, getPrefixShort(i), "", null, true);
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.error("\"" + image + "\" is malformed");
+ LOGGER.error(e.getMessage());
}
}
if (!image.contains("8muses.com")) {
@@ -173,25 +151,4 @@ public String getPrefixLong(int index) {
public String getPrefixShort(int index) {
return String.format("%03d", index);
}
-
- private String deobfuscateJSON(String obfuscatedString) {
- StringBuilder deobfuscatedString = new StringBuilder();
- // The first char in one of 8muses obfuscated strings is always ! so we replace it
- for (char ch : obfuscatedString.replaceFirst("!", "").toCharArray()){
- deobfuscatedString.append(deobfuscateChar(ch));
- }
- return deobfuscatedString.toString();
- }
-
- private String deobfuscateChar(char c) {
- if ((int) c == 32) {
- return fromCharCode(32);
- }
- return fromCharCode(33 + (c + 14) % 94);
-
- }
-
- private static String fromCharCode(int... codePoints) {
- return new String(codePoints, 0, codePoints.length);
- }
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
index d64e96005..0f77e03c5 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
@@ -7,6 +7,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -93,11 +95,11 @@ public Document getNextPage(Document doc) throws IOException {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
if (!is_profile(url)) {
try {
// Attempt to use album title as GID
- Element titleElement = getFirstPage().select("meta[property=og:title]").first();
+ Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
String title = titleElement.attr("content");
title = title.substring(title.lastIndexOf('/') + 1);
return getHost() + "_" + getGID(url) + "_" + title.trim();
@@ -119,7 +121,6 @@ public List getURLsFromPage(Document doc) {
for (Element img : imgs) {
if (img.hasClass("album-image")) {
String imageURL = img.attr("src");
- imageURL = imageURL;
URLs.add(imageURL);
}
}
@@ -195,7 +196,7 @@ public String getGID(URL url) throws MalformedURLException {
throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album or eroshae.com/album");
}
- public static List getURLs(URL url) throws IOException{
+ public static List getURLs(URL url) throws IOException, URISyntaxException {
Response resp = Http.url(url)
.ignoreContentType()
@@ -209,7 +210,7 @@ public static List getURLs(URL url) throws IOException{
for (Element img : imgs) {
if (img.hasClass("album-image")) {
String imageURL = img.attr("src");
- URLs.add(new URL(imageURL));
+ URLs.add(new URI(imageURL).toURL());
}
}
//Videos
@@ -218,7 +219,7 @@ public static List getURLs(URL url) throws IOException{
if (vid.hasClass("album-video")) {
Elements source = vid.getElementsByTag("source");
String videoURL = source.first().attr("src");
- URLs.add(new URL(videoURL));
+ URLs.add(new URI(videoURL).toURL());
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java
index dc535deaa..95528470c 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java
@@ -9,6 +9,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
@@ -48,11 +50,6 @@ public String getGID(URL url) throws MalformedURLException {
return m.group(m.groupCount());
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document page) {
LOGGER.info(page);
@@ -94,8 +91,8 @@ public void ripAlbum(Document page) {
Map opts = new HashMap();
opts.put("subdirectory", page.title().replaceAll(" \\| Erofus - Sex and Porn Comics", "").replaceAll(" ", "_"));
opts.put("prefix", getPrefix(x));
- addURLToDownload(new URL(image), opts);
- } catch (MalformedURLException e) {
+ addURLToDownload(new URI(image).toURL(), opts);
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.info(e.getMessage());
}
x++;
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java
index 7f056dc26..3035d7465 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java
@@ -2,16 +2,19 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.rarchives.ripme.utils.Utils;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
@@ -22,10 +25,8 @@
*/
public class EromeRipper extends AbstractHTMLRipper {
- private static final String EROME_REFERER = "https://www.erome.com/";
-
boolean rippingProfile;
-
+ private HashMap cookies = new HashMap<>();
public EromeRipper (URL url) throws IOException {
super(url);
@@ -33,17 +34,17 @@ public EromeRipper (URL url) throws IOException {
@Override
public String getDomain() {
- return "erome.com";
+ return "erome.com";
}
@Override
public String getHost() {
- return "erome";
+ return "erome";
}
@Override
public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index), "", EROME_REFERER, null, null);
+ addURLToDownload(url, getPrefix(index), "", "erome.com", this.cookies);
}
@Override
@@ -68,39 +69,40 @@ public List getAlbumsToQueue(Document doc) {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
- try {
- // Attempt to use album title as GID
- Element titleElement = getFirstPage().select("meta[property=og:title]").first();
- String title = titleElement.attr("content");
- title = title.substring(title.lastIndexOf('/') + 1);
- return getHost() + "_" + getGID(url) + "_" + title.trim();
- } catch (IOException e) {
- // Fall back to default album naming convention
- LOGGER.info("Unable to find title at " + url);
- } catch (NullPointerException e) {
- return getHost() + "_" + getGID(url);
- }
- return super.getAlbumTitle(url);
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
+ try {
+ // Attempt to use album title as GID
+ Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
+ String title = titleElement.attr("content");
+ title = title.substring(title.lastIndexOf('/') + 1);
+ return getHost() + "_" + getGID(url) + "_" + title.trim();
+ } catch (IOException e) {
+ // Fall back to default album naming convention
+ LOGGER.info("Unable to find title at " + url);
+ } catch (NullPointerException e) {
+ return getHost() + "_" + getGID(url);
+ }
+ return super.getAlbumTitle(url);
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- return new URL(url.toExternalForm().replaceAll("https?://erome.com", "https://www.erome.com"));
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
+ return new URI(url.toExternalForm().replaceAll("https?://erome.com", "https://www.erome.com")).toURL();
}
@Override
public List getURLsFromPage(Document doc) {
- List URLs = new ArrayList<>();
return getMediaFromPage(doc);
}
@Override
public Document getFirstPage() throws IOException {
+ this.setAuthCookie();
Response resp = Http.url(this.url)
- .ignoreContentType()
- .response();
+ .cookies(cookies)
+ .ignoreContentType()
+ .response();
return resp.parse();
}
@@ -126,18 +128,17 @@ public String getGID(URL url) throws MalformedURLException {
private List getMediaFromPage(Document doc) {
List results = new ArrayList<>();
for (Element el : doc.select("img.img-front")) {
- if (el.hasAttr("src")) {
- if (el.attr("src").startsWith("https:")) {
- results.add(el.attr("src"));
- } else {
- results.add("https:" + el.attr("src"));
- }
- } else if (el.hasAttr("data-src")) {
- //to add images that are not loaded( as all images are lasyloaded as we scroll).
- results.add(el.attr("data-src"));
- }
-
- }
+ if (el.hasAttr("data-src")) {
+ //to add images that are not loaded( as all images are lasyloaded as we scroll).
+ results.add(el.attr("data-src"));
+ } else if (el.hasAttr("src")) {
+ if (el.attr("src").startsWith("https:")) {
+ results.add(el.attr("src"));
+ } else {
+ results.add("https:" + el.attr("src"));
+ }
+ }
+ }
for (Element el : doc.select("source[label=HD]")) {
if (el.attr("src").startsWith("https:")) {
results.add(el.attr("src"));
@@ -154,7 +155,22 @@ private List getMediaFromPage(Document doc) {
results.add("https:" + el.attr("src"));
}
}
+
+ if (results.size() == 0) {
+ if (cookies.isEmpty()) {
+ LOGGER.warn("You might try setting erome.laravel_session manually " +
+ "if you think this page definitely contains media.");
+ }
+ }
+
return results;
}
+ private void setAuthCookie() {
+ String sessionId = Utils.getConfigString("erome.laravel_session", null);
+ if (sessionId != null) {
+ cookies.put("laravel_session", sessionId);
+ }
+ }
+
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ErotivRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ErotivRipper.java
index 10e73346a..045110850 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ErotivRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ErotivRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -11,7 +13,6 @@
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
@@ -57,8 +58,8 @@ public Document getFirstPage() throws IOException {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- return new URL(url.toExternalForm().replaceAll("https?://www.erotiv.io", "https://erotiv.io"));
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
+ return new URI(url.toExternalForm().replaceAll("https?://www.erotiv.io", "https://erotiv.io")).toURL();
}
@Override
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java
index 1922002b1..2661d0559 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java
@@ -12,7 +12,6 @@
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
public class FemjoyhunterRipper extends AbstractHTMLRipper {
@@ -41,12 +40,6 @@ public String getGID(URL url) throws MalformedURLException {
"femjoyhunter.com/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FitnakedgirlsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FitnakedgirlsRipper.java
index de6fb73d8..51d5f15f8 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FitnakedgirlsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FitnakedgirlsRipper.java
@@ -1,72 +1,66 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
-
-public class FitnakedgirlsRipper extends AbstractHTMLRipper {
-
- public FitnakedgirlsRipper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public String getHost() {
- return "fitnakedgirls";
- }
-
- @Override
- public String getDomain() {
- return "fitnakedgirls.com";
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p;
- Matcher m;
-
- p = Pattern.compile("^.*fitnakedgirls\\.com/gallery/(.+)$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
-
- throw new MalformedURLException(
- "Expected fitnakedgirls.com gallery format: " + "fitnakedgirls.com/gallery/####" + " Got: " + url);
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
- @Override
- public List getURLsFromPage(Document doc) {
- List imageURLs = new ArrayList<>();
-
- Elements imgs = doc.select("div[class*=wp-tiles-tile-bg] > img");
- for (Element img : imgs) {
- String imgSrc = img.attr("src");
- imageURLs.add(imgSrc);
- }
-
- return imageURLs;
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- // Send referrer when downloading images
- addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
- }
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+
+public class FitnakedgirlsRipper extends AbstractHTMLRipper {
+
+ public FitnakedgirlsRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "fitnakedgirls";
+ }
+
+ @Override
+ public String getDomain() {
+ return "fitnakedgirls.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p;
+ Matcher m;
+
+ p = Pattern.compile("^.*fitnakedgirls\\.com/gallery/(.+)$");
+ m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+
+ throw new MalformedURLException(
+ "Expected fitnakedgirls.com gallery format: " + "fitnakedgirls.com/gallery/####" + " Got: " + url);
+ }
+
+ @Override
+ public List getURLsFromPage(Document doc) {
+ List imageURLs = new ArrayList<>();
+
+ Elements imgs = doc.select("div[class*=wp-tiles-tile-bg] > img");
+ for (Element img : imgs) {
+ String imgSrc = img.attr("src");
+ imageURLs.add(imgSrc);
+ }
+
+ return imageURLs;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ // Send referrer when downloading images
+ addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
+ }
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java
index 6591dd011..bba284f14 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java
@@ -1,10 +1,9 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
import java.io.IOException;
-import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
-import java.net.URL;
+import java.net.*;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@@ -163,8 +162,8 @@ private String getUserID(String username) throws IOException {
}
@Override
- public JSONObject getFirstPage() throws IOException {
- URL apiURL = new URL(baseURL + "&consumer_key=" + CONSUMER_KEY);
+ public JSONObject getFirstPage() throws IOException, URISyntaxException {
+ URL apiURL = new URI(baseURL + "&consumer_key=" + CONSUMER_KEY).toURL();
LOGGER.debug("apiURL: " + apiURL);
JSONObject json = Http.url(apiURL).getJSON();
@@ -231,7 +230,7 @@ else if (baseURL.contains("/blogs?")) {
}
@Override
- public JSONObject getNextPage(JSONObject json) throws IOException {
+ public JSONObject getNextPage(JSONObject json) throws IOException, URISyntaxException {
if (isThisATest()) {
return null;
}
@@ -248,9 +247,9 @@ public JSONObject getNextPage(JSONObject json) throws IOException {
sleep(500);
++page;
- URL apiURL = new URL(baseURL
+ URL apiURL = new URI(baseURL
+ "&page=" + page
- + "&consumer_key=" + CONSUMER_KEY);
+ + "&consumer_key=" + CONSUMER_KEY).toURL();
return Http.url(apiURL).getJSON();
}
@@ -295,14 +294,9 @@ public List getURLsFromJSON(JSONObject json) {
}
}
}
- if (imageURL == null) {
- LOGGER.error("Failed to find image for photo " + photo.toString());
- }
- else {
- imageURLs.add(imageURL);
- if (isThisATest()) {
- break;
- }
+ imageURLs.add(imageURL);
+ if (isThisATest()) {
+ break;
}
}
return imageURLs;
@@ -310,13 +304,13 @@ public List getURLsFromJSON(JSONObject json) {
private boolean urlExists(String url) {
try {
- HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
+ HttpURLConnection connection = (HttpURLConnection) new URI(url).toURL().openConnection();
connection.setRequestMethod("HEAD");
if (connection.getResponseCode() != 200) {
throw new IOException("Couldn't find full-size image at " + url);
}
return true;
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
return false;
}
}
@@ -330,8 +324,8 @@ public boolean keepSortOrder() {
public void downloadURL(URL url, int index) {
String u = url.toExternalForm();
String[] fields = u.split("/");
- String prefix = getPrefix(index) + fields[fields.length - 3];
- File saveAs = new File(getWorkingDir() + File.separator + prefix + ".jpg");
+ String prefix = "/" + getPrefix(index) + fields[fields.length - 3];
+ Path saveAs = Paths.get(getWorkingDir() + prefix + ".jpg");
addURLToDownload(url, saveAs, "", null, false);
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
index 320884245..c58a7e717 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.*;
import java.util.regex.Matcher;
@@ -20,7 +22,6 @@
public class FlickrRipper extends AbstractHTMLRipper {
- private Document albumDoc = null;
private final DownloadThreadPool flickrThreadPool;
private enum UrlType {
@@ -63,7 +64,7 @@ public String getDomain() {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String sUrl = url.toExternalForm();
// Strip out https
sUrl = sUrl.replace("https://secure.flickr.com", "http://www.flickr.com");
@@ -74,7 +75,7 @@ public URL sanitizeURL(URL url) throws MalformedURLException {
}
sUrl += "pool";
}
- return new URL(sUrl);
+ return new URI(sUrl).toURL();
}
// FLickr is one of those sites what includes a api key in sites javascript
// TODO let the user provide their own api key
@@ -129,8 +130,8 @@ private JSONObject getJSON(String page, String apiKey) {
String apiURL = null;
try {
apiURL = apiURLBuilder(getAlbum(url.toExternalForm()), page, apiKey);
- pageURL = new URL(apiURL);
- } catch (MalformedURLException e) {
+ pageURL = new URI(apiURL).toURL();
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
}
try {
@@ -172,13 +173,13 @@ private Album getAlbum(String url) throws MalformedURLException {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
if (!url.toExternalForm().contains("/sets/")) {
return super.getAlbumTitle(url);
}
try {
// Attempt to use album title as GID
- Document doc = getFirstPage();
+ Document doc = getCachedFirstPage();
String user = url.toExternalForm();
user = user.substring(user.indexOf("/photos/") + "/photos/".length());
user = user.substring(0, user.indexOf("/"));
@@ -228,13 +229,6 @@ public String getGID(URL url) throws MalformedURLException {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = Http.url(url).get();
- }
- return albumDoc;
- }
@Override
public List getURLsFromPage(Document doc) {
@@ -268,7 +262,7 @@ public List getURLsFromPage(Document doc) {
JSONObject data = (JSONObject) pictures.get(i);
try {
addURLToDownload(getLargestImageURL(data.getString("id"), apiKey));
- } catch (MalformedURLException e) {
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.error("Flickr MalformedURLException: " + e.getMessage());
}
@@ -291,11 +285,11 @@ public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
- private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException {
+ private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException, URISyntaxException {
TreeMap imageURLMap = new TreeMap<>();
try {
- URL imageAPIURL = new URL("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1");
+ URL imageAPIURL = new URI("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1").toURL();
JSONArray imageSizes = new JSONObject(Http.url(imageAPIURL).ignoreContentType().get().text()).getJSONObject("sizes").getJSONArray("size");
for (int i = 0; i < imageSizes.length(); i++) {
JSONObject imageInfo = imageSizes.getJSONObject(i);
@@ -310,6 +304,6 @@ private URL getLargestImageURL(String imageID, String apiKey) throws MalformedUR
LOGGER.error("IOException while looking at image sizes: " + e.getMessage());
}
- return new URL(imageURLMap.lastEntry().getValue());
+ return new URI(imageURLMap.lastEntry().getValue()).toURL();
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FooktubeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FooktubeRipper.java
index 3cda70b2c..fed1abe02 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FooktubeRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FooktubeRipper.java
@@ -10,16 +10,9 @@
import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import com.rarchives.ripme.ripper.VideoRipper;
-import com.rarchives.ripme.utils.Http;
public class FooktubeRipper extends AbstractSingleFileRipper {
- private static final String HOST = "mulemax";
-
public FooktubeRipper(URL url) throws IOException {
super(url);
}
@@ -34,10 +27,6 @@ public String getDomain() {
return "mulemax.com";
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
@Override
public boolean canRip(URL url) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java
index e08d77fd2..a39d3b9b2 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java
@@ -44,12 +44,6 @@ public String getGID(URL url) throws MalformedURLException {
"freecomiconline.me/TITLE/CHAPTER - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
String nextPage = doc.select("div.select-pagination a").get(1).attr("href");
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
index 683c791b9..dbb46fe1c 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
@@ -1,10 +1,12 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
-import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -15,11 +17,10 @@
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.Connection.Response;
-import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.safety.Whitelist;
+import org.jsoup.safety.Safelist;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
@@ -91,14 +92,13 @@ public Document getNextPage(Document doc) throws IOException {
String nextUrl = urlBase + nextPageUrl.first().attr("href");
sleep(500);
- Document nextPage = Http.url(nextUrl).cookies(cookies).get();
- return nextPage;
+ return Http.url(nextUrl).cookies(cookies).get();
}
private String getImageFromPost(String url) {
sleep(1000);
- Document d = null;
+ Document d;
try {
d = Http.url(url).cookies(cookies).get();
Elements links = d.getElementsByTag("a");
@@ -125,6 +125,9 @@ public List getURLsFromPage(Document page) {
urls.add(urlToAdd);
}
}
+ if (isStopped() || isThisATest()) {
+ break;
+ }
}
return urls;
}
@@ -164,7 +167,7 @@ public String getDescription(String page) {
ele.select("br").append("\\n");
ele.select("p").prepend("\\n\\n");
LOGGER.debug("Returning description at " + page);
- String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
+ String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Safelist.none(), new Document.OutputSettings().prettyPrint(false));
return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
} catch (IOException ioe) {
LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
@@ -181,24 +184,22 @@ public boolean saveText(URL url, String subdirectory, String text, int index) {
}
String newText = "";
String saveAs = "";
- File saveFileAs;
+ Path saveFileAs;
saveAs = text.split("\n")[0];
saveAs = saveAs.replaceAll("^(\\S+)\\s+by\\s+(.*)$", "$2_$1");
for (int i = 1;i < text.split("\n").length; i++) {
newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
}
try {
- if (!subdirectory.equals("")) {
- subdirectory = File.separator + subdirectory;
- }
- saveFileAs = new File(
- workingDir.getCanonicalPath()
+ saveFileAs = Paths.get(
+ workingDir
+ + "/"
+ subdirectory
- + File.separator
+ + "/"
+ saveAs
+ ".txt");
// Write the file
- FileOutputStream out = (new FileOutputStream(saveFileAs));
+ OutputStream out = Files.newOutputStream(saveFileAs);
out.write(text.getBytes());
out.close();
} catch (IOException e) {
@@ -206,9 +207,13 @@ public boolean saveText(URL url, String subdirectory, String text, int index) {
return false;
}
LOGGER.debug("Downloading " + url + "'s description to " + saveFileAs);
- if (!saveFileAs.getParentFile().exists()) {
+ if (!Files.exists(saveFileAs.getParent())) {
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
- saveFileAs.getParentFile().mkdirs();
+ try {
+ Files.createDirectory(saveFileAs.getParent());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
}
return true;
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java
index d88b16e87..62a60fccd 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -40,7 +42,7 @@ public String getDomain() {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String u = url.toExternalForm();
if (u.contains("/thumbs/")) {
u = u.replace("/thumbs/", "/full/");
@@ -48,7 +50,7 @@ public URL sanitizeURL(URL url) throws MalformedURLException {
if (u.contains("/expanded/")) {
u = u.replaceAll("/expanded/", "/full/");
}
- return new URL(u);
+ return new URI(u).toURL();
}
@Override
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
deleted file mode 100644
index c542c6dcf..000000000
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
+++ /dev/null
@@ -1,160 +0,0 @@
-package com.rarchives.ripme.ripper.rippers;
-
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import org.json.JSONArray;
-import org.json.JSONObject;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import com.rarchives.ripme.utils.Http;
-
-
-public class GfycatRipper extends AbstractHTMLRipper {
-
- private static final String HOST = "gfycat.com";
- String username = "";
- String cursor = "";
- String count = "30";
- String REFERRER = "www.reddit.com";
-
-
-
- public GfycatRipper(URL url) throws IOException {
- super(new URL(url.toExternalForm().split("-")[0].replace("thumbs.", "")));
- }
-
- @Override
- public String getDomain() {
- return "gfycat.com";
- }
-
- @Override
- public String getHost() {
- return "gfycat";
- }
-
- @Override
- public boolean canRip(URL url) {
- return url.getHost().endsWith(HOST);
- }
-
- @Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- String sUrl = url.toExternalForm();
- sUrl = sUrl.replace("/gifs/detail", "");
- sUrl = sUrl.replace("/amp", "");
- return new URL(sUrl);
- }
-
- public boolean isProfile() {
- Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9\\.\\-\\_]+).*$");
- Matcher m = p.matcher(url.toExternalForm());
- return m.matches();
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- if (!isProfile()) {
- return Http.url(url).referrer(REFERRER).get();
- } else {
- username = getGID(url);
- return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats")).referrer((REFERRER)).ignoreContentType().get();
- }
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index));
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://(?:thumbs\\.|[wm\\.]*)gfycat\\.com/@?([a-zA-Z0-9\\.\\-\\_]+).*$");
- Matcher m = p.matcher(url.toExternalForm());
-
- if (m.matches())
- return m.group(1);
-
- throw new MalformedURLException(
- "Expected gfycat.com format: "
- + "gfycat.com/id or "
- + "thumbs.gfycat.com/id.gif"
- + " Got: " + url);
- }
-
- private String stripHTMLTags(String t) {
- t = t.replaceAll("\n" +
- " \n" +
- " ", "");
- t = t.replaceAll("\n" +
- "", "");
- t = t.replaceAll("\n", "");
- t = t.replaceAll("=\"\"", "");
- return t;
- }
-
- @Override
- public Document getNextPage(Document doc) throws IOException {
- if (cursor.equals("")) {
- throw new IOException("No more pages");
- }
- return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
- }
-
- @Override
- public List getURLsFromPage(Document doc) {
- List result = new ArrayList<>();
- if (isProfile()) {
- JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
- JSONArray content = page.getJSONArray("gfycats");
- for (int i = 0; i < content.length(); i++) {
- result.add(content.getJSONObject(i).getString("mp4Url"));
- }
- cursor = page.getString("cursor");
- } else {
- Elements videos = doc.select("script");
- for (Element el : videos) {
- String json = el.html();
- if (json.startsWith("{")) {
- JSONObject page = new JSONObject(json);
- result.add(page.getJSONObject("video").getString("contentUrl"));
- }
- }
- }
- return result;
- }
-
- /**
- * Helper method for retrieving video URLs.
- * @param url URL to gfycat page
- * @return URL to video
- * @throws IOException
- */
- public static String getVideoURL(URL url) throws IOException {
- LOGGER.info("Retrieving " + url.toExternalForm());
-
- //Sanitize the URL first
- url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
-
- Document doc = Http.url(url).get();
- Elements videos = doc.select("script");
- for (Element el : videos) {
- String json = el.html();
- if (json.startsWith("{")) {
- JSONObject page = new JSONObject(json);
- return page.getJSONObject("video").getString("contentUrl");
- }
- }
- throw new IOException();
- }
-}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatporntubeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatporntubeRipper.java
index fd8c292a7..bdb58ad2c 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatporntubeRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatporntubeRipper.java
@@ -11,8 +11,6 @@
import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
import org.jsoup.nodes.Document;
-import com.rarchives.ripme.utils.Http;
-
public class GfycatporntubeRipper extends AbstractSingleFileRipper {
public GfycatporntubeRipper(URL url) throws IOException {
@@ -40,12 +38,6 @@ public String getGID(URL url) throws MalformedURLException {
"gfycatporntube.com/NAME - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java
index 2afc79d16..49cbfc604 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -13,11 +14,8 @@
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
public class GirlsOfDesireRipper extends AbstractHTMLRipper {
- // Current HTML document
- private Document albumDoc = null;
public GirlsOfDesireRipper(URL url) throws IOException {
super(url);
@@ -32,10 +30,10 @@ public String getDomain() {
return "girlsofdesire.org";
}
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title as GID
- Document doc = getFirstPage();
+ Document doc = getCachedFirstPage();
Elements elems = doc.select(".albumName");
return getHost() + "_" + elems.first().text();
} catch (Exception e) {
@@ -62,14 +60,6 @@ public String getGID(URL url) throws MalformedURLException {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = Http.url(url).get();
- }
- return albumDoc;
- }
-
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java
index fd3b23c24..040ca9780 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -49,9 +50,9 @@ public Document getFirstPage() throws IOException {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
- Document doc = getFirstPage();
+ Document doc = getCachedFirstPage();
String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
return getHost() + "_" + title + "_" + getGID(url);
} catch (Exception e) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java
index cb5215233..2b8ac9674 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -90,7 +91,7 @@ public Document getFirstPage() throws IOException {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
return getHost() + "_" + getGID(url);
} catch (Exception e) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java
index ca709418a..4d28f7a2a 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Base64;
@@ -80,16 +82,15 @@ protected List getURLsFromJSON(JSONObject json) throws JSONException {
}
@Override
- protected JSONObject getFirstPage() throws IOException {
+ protected JSONObject getFirstPage() throws IOException, URISyntaxException {
String jsonEncodedString = getJsonEncodedStringFromPage();
String jsonDecodedString = decodeJsonString(jsonEncodedString);
return new JSONObject(jsonDecodedString);
}
- public String getJsonEncodedStringFromPage() throws MalformedURLException, IOException
- {
+ public String getJsonEncodedStringFromPage() throws MalformedURLException, IOException, URISyntaxException {
// Image data only appears on the /read/ page and not on the /view/ one.
- URL readUrl = new URL(String.format("http://hentainexus.com/read/%s",getGID(url)));
+ URL readUrl = new URI(String.format("http://hentainexus.com/read/%s",getGID(url))).toURL();
Document document = Http.url(readUrl).response().parse();
for (Element scripts : document.getElementsByTag("script")) {
@@ -143,7 +144,7 @@ The following code is a Java adaptation of the initRender() JavaScript function
}
magicByte = (byte) (magicByte & 0x7);
- ArrayList newArray = new ArrayList();
+ ArrayList newArray = new ArrayList<>();
for (int i = 0x0; i < 0x100; i++) {
newArray.add(i);
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaidudeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaidudeRipper.java
index 7950f0cf1..246258597 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaidudeRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaidudeRipper.java
@@ -10,6 +10,7 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
@@ -50,12 +51,6 @@ public String getGID(URL url) throws MalformedURLException {
"Expected hqporner URL format: " + "hentaidude.com/VIDEO - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
@@ -84,7 +79,7 @@ public DownloadThreadPool getThreadPool() {
return hentaidudeThreadPool;
}
- private class HentaidudeDownloadThread extends Thread {
+ private class HentaidudeDownloadThread implements Runnable {
private URL url;
@@ -97,7 +92,7 @@ public HentaidudeDownloadThread(URL url, int index) {
public void run() {
try {
Document doc = Http.url(url).get();
- URL videoSourceUrl = new URL(getVideoUrl(doc));
+ URL videoSourceUrl = new URI(getVideoUrl(doc)).toURL();
addURLToDownload(videoSourceUrl, "", "", "", null, getVideoName(), "mp4");
} catch (Exception e) {
LOGGER.error("Could not get video url for " + getVideoName(), e);
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java
index a4e5895d5..d6dba4190 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -12,7 +13,6 @@
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
public class HentaifoxRipper extends AbstractHTMLRipper {
@@ -41,12 +41,6 @@ public String getGID(URL url) throws MalformedURLException {
"https://hentaifox.com/gallery/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
LOGGER.info(doc);
@@ -59,9 +53,9 @@ public List getURLsFromPage(Document doc) {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
- Document doc = getFirstPage();
+ Document doc = getCachedFirstPage();
String title = doc.select("div.info > h1").first().text();
return getHost() + "_" + title + "_" + getGID(url);
} catch (Exception e) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiimageRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiimageRipper.java
index df7bfb963..45628e825 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiimageRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiimageRipper.java
@@ -52,13 +52,6 @@ public String getGID(URL url) throws MalformedURLException {
"https://hentai-image.com/image/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HitomiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HitomiRipper.java
index 3196c1394..d312b75b0 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HitomiRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HitomiRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -13,7 +15,6 @@
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
-import org.jsoup.nodes.Element;
public class HitomiRipper extends AbstractHTMLRipper {
@@ -35,20 +36,20 @@ public String getDomain() {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("https://hitomi.la/galleries/([\\d]+).html");
+ Pattern p = Pattern.compile("https://hitomi.la/(cg|doujinshi|gamecg|manga)/(.+).html");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
galleryId = m.group(1);
return m.group(1);
}
throw new MalformedURLException("Expected hitomi URL format: " +
- "https://hitomi.la/galleries/ID.html - got " + url + " instead");
+ "https://hitomi.la/(cg|doujinshi|gamecg|manga)/ID.html - got " + url + " instead");
}
@Override
- public Document getFirstPage() throws IOException {
+ public Document getFirstPage() throws IOException, URISyntaxException {
// if we go to /GALLERYID.js we get a nice json array of all images in the gallery
- return Http.url(new URL(url.toExternalForm().replaceAll("hitomi", "ltn.hitomi").replaceAll(".html", ".js"))).ignoreContentType().get();
+ return Http.url(new URI(url.toExternalForm().replaceAll("hitomi", "ltn.hitomi").replaceAll(".html", ".js")).toURL()).ignoreContentType().get();
}
@@ -65,7 +66,7 @@ public List getURLsFromPage(Document doc) {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title and username as GID
Document doc = Http.url(url).get();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java
index 8d13f1138..0f69c75be 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java
@@ -11,6 +11,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -63,9 +65,8 @@ public String getGID(URL url) throws MalformedURLException {
}
@Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
+ public Document getFirstPage() throws IOException, URISyntaxException {
+ return super.getFirstPage();
}
@Override
@@ -130,7 +131,7 @@ public boolean useByteProgessBar() {
return true;
}
- private class HqpornerDownloadThread extends Thread {
+ private class HqpornerDownloadThread implements Runnable {
private URL hqpornerVideoPageUrl;
//private int index;
@@ -164,10 +165,10 @@ public void fetchVideo() {
}
if (downloadUrl != null) {
- addURLToDownload(new URL(downloadUrl), "", subdirectory, "", null, getVideoName(), "mp4");
+ addURLToDownload(new URI(downloadUrl).toURL(), "", subdirectory, "", null, getVideoName(), "mp4");
}
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while downloading video.", e);
}
}
@@ -215,7 +216,7 @@ private String getVideoFromUnknown(String videoPageurl) {
try {
logger.info("Trying to download from unknown video host " + videoPageurl);
- URL url = new URL(videoPageurl);
+ URL url = new URI(videoPageurl).toURL();
Response response = Http.url(url).referrer(hqpornerVideoPageUrl).response();
Document doc = response.parse();
@@ -245,7 +246,7 @@ private String getVideoFromUnknown(String videoPageurl) {
}
}
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
logger.error("Unable to get video url using generic methods.");
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java
index 5b4812584..154206552 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java
@@ -46,12 +46,6 @@ public String getGID(URL url) throws MalformedURLException {
"hypnohub.net/pool/show/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
private String ripPost(String url) throws IOException {
LOGGER.info(url);
Document doc = Http.url(url).get();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java
deleted file mode 100644
index 062217b21..000000000
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java
+++ /dev/null
@@ -1,112 +0,0 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
-
-public class ImagearnRipper extends AbstractHTMLRipper {
-
- public ImagearnRipper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public String getHost() {
- return "imagearn";
- }
- @Override
- public String getDomain() {
- return "imagearn.com";
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^.*imagearn.com/+gallery.php\\?id=([0-9]+).*$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
- throw new MalformedURLException(
- "Expected imagearn.com gallery formats: "
- + "imagearn.com/gallery.php?id=####..."
- + " Got: " + url);
- }
-
- public URL sanitizeURL(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^.*imagearn.com/+image.php\\?id=[0-9]+.*$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- // URL points to imagearn *image*, not gallery
- try {
- url = getGalleryFromImage(url);
- } catch (Exception e) {
- LOGGER.error("[!] " + e.getMessage(), e);
- }
- }
- return url;
- }
-
- private URL getGalleryFromImage(URL url) throws IOException {
- Document doc = Http.url(url).get();
- for (Element link : doc.select("a[href~=^gallery\\.php.*$]")) {
- LOGGER.info("LINK: " + link.toString());
- if (link.hasAttr("href")
- && link.attr("href").contains("gallery.php")) {
- url = new URL("http://imagearn.com/" + link.attr("href"));
- LOGGER.info("[!] Found gallery from given link: " + url);
- return url;
- }
- }
- throw new IOException("Failed to find gallery at URL " + url);
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
- @Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
- try {
- Document doc = getFirstPage();
- String title = doc.select("h3 > strong").first().text(); // profile name
- return getHost() + "_" + title + "_" + getGID(url);
- } catch (Exception e) {
- // Fall back to default album naming convention
- LOGGER.warn("Failed to get album title from " + url, e);
- }
- return super.getAlbumTitle(url);
- }
-
- @Override
- public List getURLsFromPage(Document doc) {
- List imageURLs = new ArrayList<>();
- for (Element thumb : doc.select("div#gallery > div > a")) {
- String imageURL = thumb.attr("href");
- try {
- Document imagedoc = new Http("http://imagearn.com/" + imageURL).get();
- String image = imagedoc.select("a.thickbox").first().attr("href");
- imageURLs.add(image);
- } catch (IOException e) {
- LOGGER.warn("Was unable to download page: " + imageURL);
- }
- }
- return imageURLs;
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index));
- sleep(1000);
- }
-}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java
index 3aca67cfc..0699273f1 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java
@@ -6,20 +6,24 @@
import com.rarchives.ripme.utils.Utils;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ImagebamRipper extends AbstractHTMLRipper {
- // Current HTML document
- private Document albumDoc = null;
-
// Thread pool for finding direct image links from "image" pages (html)
private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam");
@Override
@@ -45,7 +49,7 @@ public String getGID(URL url) throws MalformedURLException {
Pattern p;
Matcher m;
- p = Pattern.compile("^https?://[wm.]*imagebam.com/gallery/([a-zA-Z0-9]+).*$");
+ p = Pattern.compile("^https?://[wm.]*imagebam.com/(gallery|view)/([a-zA-Z0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
@@ -57,14 +61,6 @@ public String getGID(URL url) throws MalformedURLException {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = Http.url(url).get();
- }
- return albumDoc;
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
@@ -80,7 +76,7 @@ public Document getNextPage(Document doc) throws IOException {
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
- for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) {
+ for (Element thumb : doc.select("div > a[class=thumbnail]:not(.footera)")) {
imageURLs.add(thumb.attr("href"));
}
return imageURLs;
@@ -94,18 +90,15 @@ public void downloadURL(URL url, int index) {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title as GID
- Elements elems = getFirstPage().select("legend");
+ Elements elems = getCachedFirstPage().select("[id=gallery-name]");
String title = elems.first().text();
LOGGER.info("Title text: '" + title + "'");
- Pattern p = Pattern.compile("^(.*)\\s\\d* image.*$");
- Matcher m = p.matcher(title);
- if (m.matches()) {
- return getHost() + "_" + getGID(url) + " (" + m.group(1).trim() + ")";
+ if (StringUtils.isNotBlank(title)) {
+ return getHost() + "_" + getGID(url) + " (" + title + ")";
}
- LOGGER.info("Doesn't match " + p.pattern());
} catch (Exception e) {
// Fall back to default album naming convention
LOGGER.warn("Failed to get album title from " + url, e);
@@ -118,9 +111,9 @@ public String getAlbumTitle(URL url) throws MalformedURLException {
*
* Handles case when site has IP-banned the user.
*/
- private class ImagebamImageThread extends Thread {
- private URL url; //link to "image page"
- private int index; //index in album
+ private class ImagebamImageThread implements Runnable {
+ private final URL url; //link to "image page"
+ private final int index; //index in album
ImagebamImageThread(URL url, int index) {
super();
@@ -138,19 +131,19 @@ public void run() {
*/
private void fetchImage() {
try {
- Document doc = Http.url(url).get();
+ Map cookies = new HashMap<>();
+ cookies.put("nsfw_inter", "1");
+ Document doc = Jsoup.connect(url.toString())
+ .cookies(cookies)
+ .get();
+
// Find image
Elements metaTags = doc.getElementsByTag("meta");
String imgsrc = "";//initialize, so no NullPointerExceptions should ever happen.
-
- for (Element metaTag: metaTags) {
- //the direct link to the image seems to always be linked in the part of the html.
- if (metaTag.attr("property").equals("og:image")) {
- imgsrc = metaTag.attr("content");
- LOGGER.info("Found URL " + imgsrc);
- break;//only one (useful) image possible for an "image page".
- }
+ Elements elem = doc.select("img[class*=main-image]");
+ if ((elem != null) && (elem.size() > 0)) {
+ imgsrc = elem.first().attr("src");
}
//for debug, or something goes wrong.
@@ -165,8 +158,8 @@ private void fetchImage() {
prefix = String.format("%03d_", index);
}
- addURLToDownload(new URL(imgsrc), prefix);
- } catch (IOException e) {
+ addURLToDownload(new URI(imgsrc).toURL(), prefix);
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java
index 14d21aa9f..4fcf22012 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java
@@ -1,8 +1,13 @@
package com.rarchives.ripme.ripper.rippers;
+import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
+import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@@ -10,6 +15,7 @@
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
@@ -17,13 +23,11 @@
public class ImagefapRipper extends AbstractHTMLRipper {
- private Document albumDoc = null;
- private boolean isNewAlbumType = false;
-
private int callsMade = 0;
private long startTime = System.nanoTime();
private static final int RETRY_LIMIT = 10;
+ private static final int HTTP_RETRY_LIMIT = 3;
private static final int RATE_LIMIT_HOUR = 1000;
// All sleep times are in milliseconds
@@ -49,54 +53,40 @@ public String getDomain() {
* Reformat given URL into the desired format (all images on single page)
*/
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String gid = getGID(url);
- String newURL = "https://www.imagefap.com/gallery.php?";
- if (isNewAlbumType) {
- newURL += "p";
- }
- newURL += "gid=" + gid + "&view=2";
+ String newURL = "https://www.imagefap.com/pictures/" + gid + "/random-string";
LOGGER.debug("Changed URL from " + url + " to " + newURL);
- return new URL(newURL);
+ return new URI(newURL).toURL();
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p; Matcher m;
+ // Old format (I suspect no longer supported)
p = Pattern.compile("^.*imagefap.com/gallery.php\\?pgid=([a-f0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
- isNewAlbumType = true;
return m.group(1);
}
+
p = Pattern.compile("^.*imagefap.com/gallery.php\\?gid=([0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
- p = Pattern.compile("^.*imagefap.com/pictures/([0-9]+).*$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
- p = Pattern.compile("^.*imagefap.com/pictures/([a-f0-9]+).*$");
+ p = Pattern.compile("^.*imagefap.com/gallery/([a-f0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
- isNewAlbumType = true;
return m.group(1);
}
- p = Pattern.compile("^.*imagefap.com/gallery/([0-9]+).*$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
- p = Pattern.compile("^.*imagefap.com/gallery/([a-f0-9]+).*$");
+ // most recent format
+ p = Pattern.compile("^.*imagefap.com/pictures/([a-f0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
- isNewAlbumType = true;
return m.group(1);
}
@@ -109,18 +99,20 @@ public String getGID(URL url) throws MalformedURLException {
@Override
public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = getPageWithRetries(url);
- }
- return albumDoc;
+
+ Document firstPage = getPageWithRetries(url);
+
+ sendUpdate(STATUS.LOADING_RESOURCE, "Loading first page...");
+
+ return firstPage;
}
@Override
- public Document getNextPage(Document doc) throws IOException {
+ public Document getNextPage(Document doc) throws IOException, URISyntaxException {
String nextURL = null;
for (Element a : doc.select("a.link3")) {
if (a.text().contains("next")) {
- nextURL = "https://imagefap.com/gallery.php" + a.attr("href");
+ nextURL = this.sanitizeURL(this.url) + a.attr("href");
break;
}
}
@@ -129,26 +121,50 @@ public Document getNextPage(Document doc) throws IOException {
}
// Sleep before fetching next page.
sleep(PAGE_SLEEP_TIME);
+
+ sendUpdate(STATUS.LOADING_RESOURCE, "Loading next page URL: " + nextURL);
+ LOGGER.info("Attempting to load next page URL: " + nextURL);
// Load next page
- Document nextPage = getPageWithRetries(new URL(nextURL));
+ Document nextPage = getPageWithRetries(new URI(nextURL).toURL());
return nextPage;
}
@Override
public List getURLsFromPage(Document doc) {
+
List imageURLs = new ArrayList<>();
+
+ LOGGER.debug("Trying to get URLs from document... ");
+
for (Element thumb : doc.select("#gallery img")) {
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
continue;
}
String image = getFullSizedImage("https://www.imagefap.com" + thumb.parent().attr("href"));
+
+ if (image == null) {
+ for (int i = 0; i < HTTP_RETRY_LIMIT; i++) {
+ image = getFullSizedImage("https://www.imagefap.com" + thumb.parent().attr("href"));
+ if (image != null) {
+ break;
+ }
+ sleep(PAGE_SLEEP_TIME);
+ }
+ if (image == null)
+ throw new RuntimeException("Unable to extract image URL from single image page! Unable to continue");
+ }
+
+ LOGGER.debug("Adding imageURL: '" + image + "'");
+
imageURLs.add(image);
if (isThisATest()) {
break;
}
}
+ LOGGER.debug("Adding " + imageURLs.size() + " URLs to download");
+
return imageURLs;
}
@@ -159,10 +175,10 @@ public void downloadURL(URL url, int index) {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title as GID
- String title = getFirstPage().title();
+ String title = getCachedFirstPage().title();
title = title.replace("Porn Pics & Porn GIFs", "");
title = title.replace(" ", "_");
String toReturn = getHost() + "_" + title + "_" + getGID(url);
@@ -177,9 +193,30 @@ private String getFullSizedImage(String pageURL) {
// Sleep before fetching image.
sleep(IMAGE_SLEEP_TIME);
- Document doc = getPageWithRetries(new URL(pageURL));
- return doc.select("img#mainPhoto").attr("src");
- } catch (IOException e) {
+ Document doc = getPageWithRetries(new URI(pageURL).toURL());
+
+ String framedPhotoUrl = doc.select("img#mainPhoto").attr("data-src");
+
+ // we use a no query param version of the URL to reduce failure rate because of some query params that change between the li elements and the mainPhotoURL
+ String noQueryPhotoUrl = framedPhotoUrl.split("\\?")[0];
+
+ LOGGER.debug("noQueryPhotoUrl: " + noQueryPhotoUrl);
+
+ // we look for a li > a element who's framed attribute starts with the noQueryPhotoUrl (only reference in the page to the full URL)
+ Elements selectedItem = doc.select("ul.thumbs > li > a[framed^='"+noQueryPhotoUrl+"']");
+
+ // the fullsize URL is in the href attribute
+ String fullSizedUrl = selectedItem.attr("href");
+
+ if("".equals(fullSizedUrl))
+ throw new IOException("JSoup full URL extraction failed from '" + selectedItem.html() + "'");
+
+ LOGGER.debug("fullSizedUrl: " + fullSizedUrl);
+
+ return fullSizedUrl;
+
+ } catch (IOException | URISyntaxException e) {
+ LOGGER.debug("Unable to get full size image URL from page: " + pageURL + " because: " + e.getMessage());
return null;
}
}
@@ -191,9 +228,10 @@ private String getFullSizedImage(String pageURL) {
* @throws IOException If page loading errors, or if retries are exhausted
*/
private Document getPageWithRetries(URL url) throws IOException {
- Document doc;
+ Document doc = null;
int retries = RETRY_LIMIT;
while (true) {
+
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
// For debugging rate limit checker. Useful to track wheter the timeout should be altered or not.
@@ -201,15 +239,42 @@ private Document getPageWithRetries(URL url) throws IOException {
checkRateLimit();
LOGGER.info("Retrieving " + url);
- doc = Http.url(url)
- .get();
+
+ boolean httpCallThrottled = false;
+ int httpAttempts = 0;
-
- if (doc.toString().contains("Your IP made too many requests to our servers and we need to check that you are a real human being")) {
+ // we attempt the http call, knowing it can fail for network reasons
+ while(true) {
+ httpAttempts++;
+ try {
+ doc = Http.url(url).get();
+ } catch(IOException e) {
+
+ LOGGER.info("Retrieving " + url + " error: " + e.getMessage());
+
+ if(e.getMessage().contains("404"))
+ throw new IOException("Gallery/Page not found!");
+
+ if(httpAttempts < HTTP_RETRY_LIMIT) {
+ sendUpdate(STATUS.DOWNLOAD_WARN, "HTTP call failed: " + e.getMessage() + " retrying " + httpAttempts + " / " + HTTP_RETRY_LIMIT);
+
+ // we sleep for a few seconds
+ sleep(PAGE_SLEEP_TIME);
+ continue;
+ } else {
+ sendUpdate(STATUS.DOWNLOAD_WARN, "HTTP call failed too many times: " + e.getMessage() + " treating this as a throttle");
+ httpCallThrottled = true;
+ }
+ }
+ // no errors, we exit
+ break;
+ }
+
+ if (httpCallThrottled || (doc != null && doc.toString().contains("Your IP made too many requests to our servers and we need to check that you are a real human being"))) {
if (retries == 0) {
throw new IOException("Hit rate limit and maximum number of retries, giving up");
}
- String message = "Hit rate limit while loading " + url + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining";
+ String message = "Probably hit rate limit while loading " + url + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining";
LOGGER.warn(message);
sendUpdate(STATUS.DOWNLOAD_WARN, message);
retries--;
@@ -218,8 +283,7 @@ private Document getPageWithRetries(URL url) throws IOException {
} catch (InterruptedException e) {
throw new IOException("Interrupted while waiting for rate limit to subside");
}
- }
- else {
+ } else {
return doc;
}
}
@@ -249,4 +313,5 @@ private long checkRateLimit() {
return duration;
}
+
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java
index f50a84a04..4691c7c63 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -56,11 +58,6 @@ public String getGID(URL url) throws MalformedURLException {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
for (Element thumb : doc.select("a[target=_blank]")) {
@@ -79,9 +76,9 @@ public void downloadURL(URL url, int index) {
*
* Handles case when site has IP-banned the user.
*/
- private class ImagevenueImageThread extends Thread {
- private URL url;
- private int index;
+ private class ImagevenueImageThread implements Runnable {
+ private final URL url;
+ private final int index;
ImagevenueImageThread(URL url, int index) {
super();
@@ -113,8 +110,8 @@ private void fetchImage() {
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
- addURLToDownload(new URL(imgsrc), prefix);
- } catch (IOException e) {
+ addURLToDownload(new URI(imgsrc).toURL(), prefix);
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java
index f3050a13f..b32fcad44 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java
@@ -40,10 +40,6 @@ public String getGID(URL url) throws MalformedURLException {
"imgbox.com/g/albumid - got " + url + "instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java
index 93cb809e7..4904ac60a 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java
@@ -1,10 +1,14 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -15,15 +19,15 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.safety.Whitelist;
+import org.jsoup.safety.Safelist;
import org.jsoup.select.Elements;
-import com.rarchives.ripme.ripper.AlbumRipper;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
-public class ImgurRipper extends AlbumRipper {
+public class ImgurRipper extends AbstractHTMLRipper {
private static final String DOMAIN = "imgur.com",
HOST = "imgur";
@@ -38,7 +42,6 @@ enum ALBUM_TYPE {
USER_ALBUM,
USER_IMAGES,
SINGLE_IMAGE,
- SERIES_OF_IMAGES,
SUBREDDIT
}
@@ -58,6 +61,7 @@ public boolean allowDuplicates() {
return albumType == ALBUM_TYPE.USER;
}
+ @Override
public boolean canRip(URL url) {
if (!url.getHost().endsWith(DOMAIN)) {
return false;
@@ -71,7 +75,24 @@ public boolean canRip(URL url) {
return true;
}
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ @Override
+ protected String getDomain() {
+ return DOMAIN;
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ // No-op as we override rip() method
+ }
+
+ @Override
+ protected List getURLsFromPage(Document page) {
+ // No-op as we override rip() method
+ return Arrays.asList();
+ }
+
+ @Override
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String u = url.toExternalForm();
if (u.indexOf('#') >= 0) {
u = u.substring(0, u.indexOf('#'));
@@ -79,11 +100,17 @@ public URL sanitizeURL(URL url) throws MalformedURLException {
u = u.replace("imgur.com/gallery/", "imgur.com/a/");
u = u.replace("https?://m\\.imgur\\.com", "http://imgur.com");
u = u.replace("https?://i\\.imgur\\.com", "http://imgur.com");
- return new URL(u);
+ return new URI(u).toURL();
}
+ @Override
public String getAlbumTitle(URL url) throws MalformedURLException {
- String gid = getGID(url);
+ String gid = null;
+ try {
+ gid = getGID(url);
+ } catch (URISyntaxException e) {
+ throw new MalformedURLException(e.getMessage());
+ }
if (this.albumType == ALBUM_TYPE.ALBUM) {
try {
// Attempt to use album title as GID
@@ -91,7 +118,7 @@ public String getAlbumTitle(URL url) throws MalformedURLException {
albumDoc = Http.url(url).get();
}
- Elements elems = null;
+ Elements elems;
/*
// TODO: Add config option for including username in album title.
@@ -106,15 +133,13 @@ public String getAlbumTitle(URL url) throws MalformedURLException {
}
*/
- String title = null;
+ String title;
final String defaultTitle1 = "Imgur: The most awesome images on the Internet";
final String defaultTitle2 = "Imgur: The magic of the Internet";
LOGGER.info("Trying to get album title");
elems = albumDoc.select("meta[property=og:title]");
- if (elems != null) {
- title = elems.attr("content");
- LOGGER.debug("Title is " + title);
- }
+ title = elems.attr("content");
+ LOGGER.debug("Title is " + title);
// This is here encase the album is unnamed, to prevent
// Imgur: The most awesome images on the Internet from being added onto the album name
if (title.contains(defaultTitle1) || title.contains(defaultTitle2)) {
@@ -124,27 +149,17 @@ public String getAlbumTitle(URL url) throws MalformedURLException {
title = "";
LOGGER.debug("Trying to use title tag to get title");
elems = albumDoc.select("title");
- if (elems != null) {
- if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
- LOGGER.debug("Was unable to get album title or album was untitled");
- }
- else {
- title = elems.text();
- }
+ if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
+ LOGGER.debug("Was unable to get album title or album was untitled");
+ }
+ else {
+ title = elems.text();
}
}
String albumTitle = "imgur_";
- /*
- // TODO: Add config option (see above)
- if (user != null) {
- albumTitle += "user_" + user;
- }
- */
albumTitle += gid;
- if (title != null) {
- albumTitle += "_" + title;
- }
+ albumTitle += "_" + title;
return albumTitle;
} catch (IOException e) {
@@ -156,118 +171,83 @@ public String getAlbumTitle(URL url) throws MalformedURLException {
@Override
public void rip() throws IOException {
- switch (albumType) {
- case ALBUM:
- // Fall-through
- case USER_ALBUM:
- LOGGER.info("Album type is USER_ALBUM");
- // Don't call getAlbumTitle(this.url) with this
- // as it seems to cause the album to be downloaded to a subdir.
- ripAlbum(this.url);
- break;
- case SERIES_OF_IMAGES:
- LOGGER.info("Album type is SERIES_OF_IMAGES");
- ripAlbum(this.url);
- break;
- case SINGLE_IMAGE:
- LOGGER.info("Album type is SINGLE_IMAGE");
- ripSingleImage(this.url);
- break;
- case USER:
- LOGGER.info("Album type is USER");
- ripUserAccount(url);
- break;
- case SUBREDDIT:
- LOGGER.info("Album type is SUBREDDIT");
- ripSubreddit(url);
- break;
- case USER_IMAGES:
- LOGGER.info("Album type is USER_IMAGES");
- ripUserImages(url);
- break;
+ try {
+ switch (albumType) {
+ case ALBUM:
+ // Fall-through
+ case USER_ALBUM:
+ LOGGER.info("Album type is USER_ALBUM");
+ // Don't call getAlbumTitle(this.url) with this
+ // as it seems to cause the album to be downloaded to a subdir.
+ ripAlbum(this.url);
+ break;
+ case SINGLE_IMAGE:
+ LOGGER.info("Album type is SINGLE_IMAGE");
+ ripSingleImage(this.url);
+ break;
+ case USER:
+ LOGGER.info("Album type is USER");
+ ripUserAccount(url);
+ break;
+ case SUBREDDIT:
+ LOGGER.info("Album type is SUBREDDIT");
+ ripSubreddit(url);
+ break;
+ case USER_IMAGES:
+ LOGGER.info("Album type is USER_IMAGES");
+ ripUserImages(url);
+ break;
+ }
+ } catch (URISyntaxException e) {
+ throw new IOException("Failed ripping " + this.url, e);
}
waitForThreads();
}
- private void ripSingleImage(URL url) throws IOException {
+ private void ripSingleImage(URL url) throws IOException, URISyntaxException {
String strUrl = url.toExternalForm();
- Document document = getDocument(strUrl);
- Matcher m = getEmbeddedJsonMatcher(document);
- if (m.matches()) {
- JSONObject json = new JSONObject(m.group(1)).getJSONObject("image");
- addURLToDownload(extractImageUrlFromJson(json), "");
- }
+ var gid = getGID(url);
+ var json = getSingleImageData(String.format("https://api.imgur.com/post/v1/media/%s?include=media,adconfig,account", gid));
+ var media = json.getJSONArray("media");
+ if (media.length()==0) {
+ throw new IOException(String.format("Failed to fetch image for url %s", strUrl));
+ }
+ if (media.length()>1) {
+ LOGGER.warn(String.format("Got multiple images for url %s", strUrl));
+ }
+ addURLToDownload(extractImageUrlFromJson((JSONObject)media.get(0)), "");
}
- private void ripAlbum(URL url) throws IOException {
+ private void ripAlbum(URL url) throws IOException, URISyntaxException {
ripAlbum(url, "");
}
- private void ripAlbum(URL url, String subdirectory) throws IOException {
- int index = 0;
+ private void ripAlbum(URL url, String subdirectory) throws IOException, URISyntaxException {
+ int index;
this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
index = 0;
ImgurAlbum album = getImgurAlbum(url);
for (ImgurImage imgurImage : album.images) {
stopCheck();
- String saveAs = workingDir.getCanonicalPath();
- if (!saveAs.endsWith(File.separator)) {
- saveAs += File.separator;
- }
+ Path saveAs = workingDir.toPath();
if (subdirectory != null && !subdirectory.equals("")) {
- saveAs += subdirectory;
+ saveAs = saveAs.resolve(subdirectory);
}
- if (!saveAs.endsWith(File.separator)) {
- saveAs += File.separator;
- }
- File subdirFile = new File(saveAs);
- if (!subdirFile.exists()) {
- subdirFile.mkdirs();
+ if (!Files.exists(saveAs)) {
+ Files.createDirectory(saveAs);
}
index += 1;
+ var imgPath = imgurImage.getSaveAs().replaceAll("\\?\\d", "");
if (Utils.getConfigBoolean("download.save_order", true)) {
- saveAs += String.format("%03d_", index);
- }
- saveAs += imgurImage.getSaveAs();
- saveAs = saveAs.replaceAll("\\?\\d", "");
- addURLToDownload(imgurImage.url, new File(saveAs));
- }
- }
-
- public static ImgurAlbum getImgurSeries(URL url) throws IOException {
- Pattern p = Pattern.compile("^.*imgur\\.com/([a-zA-Z0-9,]*).*$");
- Matcher m = p.matcher(url.toExternalForm());
- ImgurAlbum album = new ImgurAlbum(url);
- if (m.matches()) {
- String[] imageIds = m.group(1).split(",");
- for (String imageId : imageIds) {
- // TODO: Fetch image with ID imageId
- LOGGER.debug("Fetching image info for ID " + imageId);
- try {
- JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
- if (!json.has("image")) {
- continue;
- }
- JSONObject image = json.getJSONObject("image");
- if (!image.has("links")) {
- continue;
- }
- JSONObject links = image.getJSONObject("links");
- if (!links.has("original")) {
- continue;
- }
- String original = links.getString("original");
- ImgurImage theImage = new ImgurImage(new URL(original));
- album.addImage(theImage);
- } catch (Exception e) {
- LOGGER.error("Got exception while fetching imgur ID " + imageId, e);
- }
+ saveAs = saveAs.resolve(String.format("%03d_%s", index, imgPath));
+ } else {
+ saveAs = saveAs.resolve(imgPath);
}
+ addURLToDownload(imgurImage.url, saveAs);
}
- return album;
}
- public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
+ public static ImgurAlbum getImgurAlbum(URL url) throws IOException, URISyntaxException {
String strUrl = url.toExternalForm();
if (!strUrl.contains(",")) {
strUrl += "/all";
@@ -275,13 +255,11 @@ public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
LOGGER.info(" Retrieving " + strUrl);
Document doc = getAlbumData("https://api.imgur.com/3/album/" + strUrl.split("/a/")[1]);
// Try to use embedded JSON to retrieve images
- LOGGER.info(Jsoup.clean(doc.body().toString(), Whitelist.none()));
-
try {
- JSONObject json = new JSONObject(Jsoup.clean(doc.body().toString(), Whitelist.none()));
+ JSONObject json = new JSONObject(Jsoup.clean(doc.body().toString(), Safelist.none()));
JSONArray jsonImages = json.getJSONObject("data").getJSONArray("images");
return createImgurAlbumFromJsonArray(url, jsonImages);
- } catch (JSONException e) {
+ } catch (JSONException | URISyntaxException e) {
LOGGER.debug("Error while parsing JSON at " + url + ", continuing", e);
}
@@ -309,54 +287,48 @@ public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
image = "http:" + thumb.select("img").attr("src");
} else {
// Unable to find image in this div
- LOGGER.error("[!] Unable to find image in div: " + thumb.toString());
+ LOGGER.error("[!] Unable to find image in div: " + thumb);
continue;
}
if (image.endsWith(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
image = image.replace(".gif", ".mp4");
}
- ImgurImage imgurImage = new ImgurImage(new URL(image));
+ ImgurImage imgurImage = new ImgurImage(new URI(image).toURL());
imgurAlbum.addImage(imgurImage);
}
return imgurAlbum;
}
- private static Matcher getEmbeddedJsonMatcher(Document doc) {
- Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL);
- return p.matcher(doc.body().html());
- }
-
- private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException {
+ private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException, URISyntaxException {
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
int imagesLength = jsonImages.length();
for (int i = 0; i < imagesLength; i++) {
JSONObject ob = jsonImages.getJSONObject(i);
- imgurAlbum.addImage(new ImgurImage( new URL(ob.getString("link"))));
+ imgurAlbum.addImage(new ImgurImage( new URI(ob.getString("link")).toURL()));
}
return imgurAlbum;
}
- private static ImgurImage createImgurImageFromJson(JSONObject json) throws MalformedURLException {
- return new ImgurImage(extractImageUrlFromJson(json));
- }
-
- private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException {
+ private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException, URISyntaxException {
String ext = json.getString("ext");
+ if (!ext.startsWith(".")) {
+ ext = "." + ext;
+ }
if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
ext = ".mp4";
}
- return new URL(
- "http://i.imgur.com/"
- + json.getString("hash")
- + ext);
+ return new URI(
+ "https://i.imgur.com/"
+ + json.getString("id")
+ + ext).toURL();
}
- private static Document getDocument(String strUrl) throws IOException {
- return Jsoup.connect(strUrl)
+ private static JSONObject getSingleImageData(String strUrl) throws IOException {
+ return Http.url(strUrl)
.userAgent(USER_AGENT)
.timeout(10 * 1000)
- .maxBodySize(0)
- .get();
+ .header("Authorization", "Client-ID " + Utils.getConfigString("imgur.client_id", "546c25a59c58ad7"))
+ .getJSON();
}
private static Document getAlbumData(String strUrl) throws IOException {
@@ -369,35 +341,71 @@ private static Document getAlbumData(String strUrl) throws IOException {
.get();
}
+ private static JSONObject getUserData(String userUrl) throws IOException {
+ return Http.url(userUrl)
+ .userAgent(USER_AGENT)
+ .timeout(10 * 1000)
+ .header("Authorization", "Client-ID " + Utils.getConfigString("imgur.client_id", "546c25a59c58ad7"))
+ .getJSON();
+ }
+
/**
* Rips all albums in an imgur user's account.
* @param url
- * URL to imgur user account (http://username.imgur.com)
- * @throws IOException
+ * URL to imgur user account (http://username.imgur.com | https://imgur.com/user/username)
*/
- private void ripUserAccount(URL url) throws IOException {
+ private void ripUserAccount(URL url) throws IOException, URISyntaxException {
+ int cPage = -1, cImage = 0;
+ String apiUrl = "https://api.imgur.com/3/account/%s/submissions/%d/newest?album_previews=1";
+ // Strip 'user_' from username
+ var username = getGID(url).replace("user_", "");
LOGGER.info("Retrieving " + url);
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
- Document doc = Http.url(url).get();
- for (Element album : doc.select("div.cover a")) {
- stopCheck();
- if (!album.hasAttr("href")
- || !album.attr("href").contains("imgur.com/a/")) {
- continue;
+
+ while (true) {
+ cPage += 1;
+ var pageUrl = String.format(apiUrl, username, cPage);
+ var json = getUserData(pageUrl);
+ var success = json.getBoolean("success");
+ var status = json.getInt("status");
+ if (!success || status!=200) {
+ throw new IOException(String.format("Unexpected status code %d for url %s and page %d", status, url, cPage));
}
- String albumID = album.attr("href").substring(album.attr("href").lastIndexOf('/') + 1);
- URL albumURL = new URL("http:" + album.attr("href") + "/noscript");
- try {
- ripAlbum(albumURL, albumID);
- Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000);
- } catch (Exception e) {
- LOGGER.error("Error while ripping album: " + e.getMessage(), e);
+ var data = json.getJSONArray("data");
+ if (data.isEmpty()) {
+ // Data array is empty for pages beyond the last page
+ break;
+ }
+ for (int i = 0; i < data.length(); i++) {
+ cImage += 1;
+ String prefixOrSubdir = "";
+ if (Utils.getConfigBoolean("download.save_order", true)) {
+ prefixOrSubdir = String.format("%03d_", cImage);
+ }
+ var d = (JSONObject)data.get(i);
+ var l = d.getString("link");
+ if (d.getBoolean("is_album")) {
+ // For album links with multiple images create a prefixed folder with album id
+ prefixOrSubdir += d.getString("id");
+ ripAlbum(new URI(l).toURL(), prefixOrSubdir);
+ try {
+ Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000L);
+ } catch (InterruptedException e) {
+ LOGGER.error(String.format("Error! Interrupted ripping album %s for user account %s", l, username), e);
+ }
+ } else {
+ // For direct links
+ if (d.has("mp4") && Utils.getConfigBoolean("prefer.mp4", false)) {
+ l = d.getString("mp4");
+ }
+ addURLToDownload(new URI(l).toURL(), prefixOrSubdir);
+ }
}
}
}
- private void ripUserImages(URL url) throws IOException {
+ private void ripUserImages(URL url) {
int page = 0; int imagesFound = 0; int imagesTotal = 0;
String jsonUrl = url.toExternalForm().replace("/all", "/ajax/images");
if (jsonUrl.contains("#")) {
@@ -417,12 +425,12 @@ private void ripUserImages(URL url) throws IOException {
for (int i = 0; i < images.length(); i++) {
imagesFound++;
JSONObject image = images.getJSONObject(i);
- String imageUrl = "http://i.imgur.com/" + image.getString("hash") + image.getString("ext");
+ String imageUrl = "https://i.imgur.com/" + image.getString("hash") + image.getString("ext");
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", imagesFound);
}
- addURLToDownload(new URL(imageUrl), prefix);
+ addURLToDownload(new URI(imageUrl).toURL(), prefix);
}
if (imagesFound >= imagesTotal) {
break;
@@ -435,7 +443,7 @@ private void ripUserImages(URL url) throws IOException {
}
}
- private void ripSubreddit(URL url) throws IOException {
+ private void ripSubreddit(URL url) throws IOException, URISyntaxException {
int page = 0;
while (true) {
stopCheck();
@@ -455,7 +463,7 @@ private void ripSubreddit(URL url) throws IOException {
if (image.contains("b.")) {
image = image.replace("b.", ".");
}
- URL imageURL = new URL(image);
+ URL imageURL = new URI(image).toURL();
addURLToDownload(imageURL);
}
if (imgs.isEmpty()) {
@@ -477,29 +485,30 @@ public String getHost() {
}
@Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p = null;
- Matcher m = null;
+ public String getGID(URL url) throws MalformedURLException, URISyntaxException {
+ Pattern p;
+ Matcher m;
- p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery)/([a-zA-Z0-9]{5,}).*$");
+ p = Pattern.compile("^https?://(?:www\\.|m\\.)?imgur\\.com/gallery/(?:(?:[a-zA-Z0-9]*/)?.*-)?([a-zA-Z0-9]+)$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur album or gallery
albumType = ALBUM_TYPE.ALBUM;
String gid = m.group(m.groupCount());
- this.url = new URL("http://imgur.com/a/" + gid);
+ this.url = new URI("https://imgur.com/a/" + gid).toURL();
return gid;
}
- p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery|t)/[a-zA-Z0-9]*/([a-zA-Z0-9]{5,}).*$");
+ // Match urls with path /a
+ p = Pattern.compile("^https?://(?:www\\.|m\\.)?imgur\\.com/(?:a|t)/(?:(?:[a-zA-Z0-9]*/)?.*-)?([a-zA-Z0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur album or gallery
albumType = ALBUM_TYPE.ALBUM;
String gid = m.group(m.groupCount());
- this.url = new URL("http://imgur.com/a/" + gid);
+ this.url = new URI("https://imgur.com/a/" + gid).toURL();
return gid;
}
- p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/?$");
+ p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{4,})\\.imgur\\.com/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Root imgur account
@@ -510,6 +519,14 @@ public String getGID(URL url) throws MalformedURLException {
albumType = ALBUM_TYPE.USER;
return "user_" + gid;
}
+ // Pattern for new imgur user url https://imgur.com/user/username
+ p = Pattern.compile("^https?://(?:www\\.|m\\.)?imgur\\.com/user/([a-zA-Z0-9]+).*$");
+ m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ String gid = m.group(1);
+ albumType = ALBUM_TYPE.USER;
+ return "user_" + gid;
+ }
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/all.*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
@@ -529,13 +546,13 @@ public String getGID(URL url) throws MalformedURLException {
if (m.matches()) {
// Imgur subreddit aggregator
albumType = ALBUM_TYPE.SUBREDDIT;
- String album = m.group(2);
+ StringBuilder album = new StringBuilder(m.group(2));
for (int i = 3; i <= m.groupCount(); i++) {
if (m.group(i) != null) {
- album += "_" + m.group(i).replace("/", "");
+ album.append("_").append(m.group(i).replace("/", ""));
}
}
- return album;
+ return album.toString();
}
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/r/(\\w+)/([a-zA-Z0-9,]{5,}).*$");
m = p.matcher(url.toExternalForm());
@@ -544,7 +561,7 @@ public String getGID(URL url) throws MalformedURLException {
albumType = ALBUM_TYPE.ALBUM;
String subreddit = m.group(m.groupCount() - 1);
String gid = m.group(m.groupCount());
- this.url = new URL("http://imgur.com/r/" + subreddit + "/" + gid);
+ this.url = new URI("https://imgur.com/r/" + subreddit + "/" + gid).toURL();
return "r_" + subreddit + "_" + gid;
}
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9]{5,})$");
@@ -554,29 +571,14 @@ public String getGID(URL url) throws MalformedURLException {
albumType = ALBUM_TYPE.SINGLE_IMAGE;
return m.group(m.groupCount());
}
- p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- // Series of imgur images
- albumType = ALBUM_TYPE.SERIES_OF_IMAGES;
- String gid = m.group(m.groupCount());
- if (!gid.contains(",")) {
- throw new MalformedURLException("Imgur image doesn't contain commas");
- }
- return gid.replaceAll(",", "-");
- }
throw new MalformedURLException("Unsupported imgur URL format: " + url.toExternalForm());
}
- public ALBUM_TYPE getAlbumType() {
- return albumType;
- }
-
public static class ImgurImage {
String title = "";
String description = "";
- String extension = "";
- public URL url = null;
+ String extension;
+ public URL url;
ImgurImage(URL url) {
this.url = url;
@@ -586,14 +588,7 @@ public static class ImgurImage {
this.extension = this.extension.substring(0, this.extension.indexOf("?"));
}
}
- ImgurImage(URL url, String title) {
- this(url);
- this.title = title;
- }
- public ImgurImage(URL url, String title, String description) {
- this(url, title);
- this.description = description;
- }
+
String getSaveAs() {
String saveAs = this.title;
String u = url.toExternalForm();
@@ -613,7 +608,7 @@ String getSaveAs() {
public static class ImgurAlbum {
String title = null;
- public URL url = null;
+ public URL url;
public List images = new ArrayList<>();
ImgurAlbum(URL url) {
this.url = url;
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java
index e7af19bcf..84fad5055 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java
@@ -55,12 +55,6 @@ public String getGID(URL url) throws MalformedURLException {
"jabarchives.com/main/view/albumname - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/JagodibujaRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/JagodibujaRipper.java
index d5df1fe5e..2f2d5c336 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/JagodibujaRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/JagodibujaRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -40,12 +42,6 @@ public String getGID(URL url) throws MalformedURLException {
throw new MalformedURLException("Expected jagodibuja.com gallery formats hwww.jagodibuja.com/Comic name/ got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
@@ -62,8 +58,8 @@ public List getURLsFromPage(Document doc) {
Element elem = comicPage.select("span.full-size-link > a").first();
LOGGER.info("Got link " + elem.attr("href"));
try {
- addURLToDownload(new URL(elem.attr("href")), "");
- } catch (MalformedURLException e) {
+ addURLToDownload(new URI(elem.attr("href")).toURL(), "");
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.warn("Malformed URL");
e.printStackTrace();
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/Jpg3Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/Jpg3Ripper.java
new file mode 100644
index 000000000..c79e02bc4
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/Jpg3Ripper.java
@@ -0,0 +1,70 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+public class Jpg3Ripper extends AbstractHTMLRipper {
+
+ public Jpg3Ripper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getDomain() {
+ return "jpg3.su";
+ }
+
+ @Override
+ public String getHost() {
+ return "jpg3";
+ }
+
+ @Override
+ public List getURLsFromPage(Document page) {
+ List urls = new ArrayList<>();
+
+ for (Element el : page.select(".image-container > img")) {
+ urls.add(el.attr("src").replaceAll("\\.md", ""));
+ }
+
+ return urls;
+ }
+
+ @Override
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
+ String u = url.toExternalForm();
+ u = u.replaceAll("https?://jpg3.su/a/([^/]+)/?.*", "https://jpg3.su/a/$1");
+ LOGGER.debug("Changed URL from " + url + " to " + u);
+ return new URI(u).toURL();
+ }
+
+ @Override
+ public Document getNextPage(Document page) throws IOException, URISyntaxException {
+ String href = page.select("[data-pagination='next']").attr("href");
+ if (!href.isEmpty()) {
+ return Http.url(href).get();
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ return url.toString().split("/")[url.toString().split("/").length - 1];
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/KingcomixRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/KingcomixRipper.java
index 4876237e4..bb8194bcb 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/KingcomixRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/KingcomixRipper.java
@@ -41,13 +41,6 @@ public String getGID(URL url) throws MalformedURLException {
"kingcomix.com/COMIX - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java
index 8986fd91b..408310a7a 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java
@@ -1,234 +1,236 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.ripper.DownloadThreadPool;
-import com.rarchives.ripme.utils.Http;
-
-
-
-/**
- * @author Tushar
- *
- */
-public class ListalRipper extends AbstractHTMLRipper {
-
- private Pattern p1 = Pattern.compile("https:\\/\\/www.listal.com\\/list\\/([a-zA-Z0-9-]+)");
- private Pattern p2 =
- Pattern.compile("https:\\/\\/www.listal.com\\/((?:(?:[a-zA-Z0-9-]+)\\/?)+)");
- private String listId = null; // listId to get more images via POST.
- private String postUrl = "https://www.listal.com/item-list/"; //to load more images.
- private UrlType urlType = UrlType.UNKNOWN;
-
- private DownloadThreadPool listalThreadPool = new DownloadThreadPool("listalThreadPool");
-
- public ListalRipper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public String getDomain() {
- return "listal.com";
- }
-
- @Override
- public String getHost() {
- return "listal";
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- Document doc = Http.url(url).get();
- if (urlType == UrlType.LIST) {
- listId = doc.select("#customlistitems").first().attr("data-listid"); // Used for list types.
- }
- return doc;
- }
-
- @Override
- public List getURLsFromPage(Document page) {
- if (urlType == UrlType.LIST) {
- // for url of type LIST, https://www.listal.com/list/my-list
- return getURLsForListType(page);
- } else if (urlType == UrlType.FOLDER) {
- // for url of type FOLDER, https://www.listal.com/jim-carrey/pictures
- return getURLsForFolderType(page);
- }
- return null;
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- listalThreadPool.addThread(new ListalImageDownloadThread(url, index));
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Matcher m1 = p1.matcher(url.toExternalForm());
- if (m1.matches()) {
- // Return the text contained between () in the regex
- urlType = UrlType.LIST;
- return m1.group(1);
- }
-
- Matcher m2 = p2.matcher(url.toExternalForm());
- if (m2.matches()) {
- // Return only gid from capturing group of type listal.com/tvOrSomething/dexter/pictures
- urlType = UrlType.FOLDER;
- return getFolderTypeGid(m2.group(1));
- }
-
- throw new MalformedURLException("Expected listal.com URL format: "
- + "listal.com/list/my-list-name - got " + url + " instead.");
- }
-
- @Override
- public Document getNextPage(Document page) throws IOException {
- Document nextPage = super.getNextPage(page);
- switch (urlType) {
- case LIST:
- if (!page.select(".loadmoreitems").isEmpty()) {
- // All items are not loaded.
- // Load remaining items using postUrl.
-
- String offSet = page.select(".loadmoreitems").last().attr("data-offset");
- Map postParams = new HashMap<>();
- postParams.put("listid", listId);
- postParams.put("offset", offSet);
- try {
- nextPage = Http.url(postUrl).data(postParams).retries(3).post();
- } catch (IOException e1) {
- LOGGER.error("Failed to load more images after " + offSet, e1);
- throw e1;
- }
- }
- break;
-
- case FOLDER:
- Elements pageLinks = page.select(".pages a");
- if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) {
- String nextUrl = pageLinks.last().attr("abs:href");
- nextPage = Http.url(nextUrl).retries(3).get();
- }
- break;
-
- case UNKNOWN:
- default:
- }
- return nextPage;
- }
-
-
- @Override
- public DownloadThreadPool getThreadPool() {
- return listalThreadPool;
- }
-
- /**
- * Returns the image urls for UrlType LIST.
- */
- private List getURLsForListType(Document page) {
- List list = new ArrayList<>();
- for (Element e : page.select(".pure-g a[href*=viewimage]")) {
- //list.add("https://www.listal.com" + e.attr("href") + "h");
- list.add(e.attr("abs:href") + "h");
- }
-
- return list;
- }
-
- /**
- * Returns the image urls for UrlType FOLDER.
- */
- private List getURLsForFolderType(Document page) {
- List list = new ArrayList<>();
- for (Element e : page.select("#browseimagescontainer .imagewrap-outer a")) {
- list.add(e.attr("abs:href") + "h");
- }
- return list;
- }
-
- /**
- * Returns the gid for url type listal.com/tvOrSomething/dexter/pictures
- */
- public String getFolderTypeGid(String group) throws MalformedURLException {
- String[] folders = group.split("/");
- try {
- if (folders.length == 2 && folders[1].equals("pictures")) {
- // Url is probably for an actor.
- return folders[0];
- }
-
- if (folders.length == 3 && folders[2].equals("pictures")) {
- // Url if for a folder(like movies, tv etc).
- Document doc = Http.url(url).get();
- return doc.select(".itemheadingmedium").first().text();
- }
-
- } catch (Exception e) {
- LOGGER.error(e);
- }
- throw new MalformedURLException("Unable to fetch the gid for given url.");
- }
-
- private class ListalImageDownloadThread extends Thread {
-
- private URL url;
- private int index;
-
- public ListalImageDownloadThread(URL url, int index) {
- super();
- this.url = url;
- this.index = index;
- }
-
- @Override
- public void run() {
- getImage();
- }
-
- public void getImage() {
- try {
- Document doc = Http.url(url).get();
-
- String imageUrl = doc.getElementsByClass("pure-img").attr("src");
- if (imageUrl != "") {
- addURLToDownload(new URL(imageUrl), getPrefix(index), "", null, null,
- getImageName());
- } else {
- LOGGER.error("Couldnt find image from url: " + url);
- }
- } catch (IOException e) {
- LOGGER.error("[!] Exception while downloading image: " + url, e);
- }
- }
-
- public String getImageName() {
- // Returns the image number of the link if possible.
- String name = this.url.toExternalForm();
- try {
- name = name.substring(name.lastIndexOf("/") + 1);
- } catch (Exception e) {
- LOGGER.info("Failed to get name for the image.");
- name = null;
- }
- // Listal stores images as .jpg
- return name + ".jpg";
- }
- }
-
- private static enum UrlType {
- LIST, FOLDER, UNKNOWN
- }
-}
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.ripper.DownloadThreadPool;
+import com.rarchives.ripme.utils.Http;
+
+
+
+/**
+ * @author Tushar
+ *
+ */
+public class ListalRipper extends AbstractHTMLRipper {
+
+ private Pattern p1 = Pattern.compile("https:\\/\\/www.listal.com\\/list\\/([a-zA-Z0-9-]+)");
+ private Pattern p2 =
+ Pattern.compile("https:\\/\\/www.listal.com\\/((?:(?:[a-zA-Z0-9-_%]+)\\/?)+)");
+ private String listId = null; // listId to get more images via POST.
+ private String postUrl = "https://www.listal.com/item-list/"; //to load more images.
+ private UrlType urlType = UrlType.UNKNOWN;
+
+ private DownloadThreadPool listalThreadPool = new DownloadThreadPool("listalThreadPool");
+
+ public ListalRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getDomain() {
+ return "listal.com";
+ }
+
+ @Override
+ public String getHost() {
+ return "listal";
+ }
+
+ @Override
+ public Document getFirstPage() throws IOException {
+ Document doc = Http.url(url).get();
+ if (urlType == UrlType.LIST) {
+ listId = doc.select("#customlistitems").first().attr("data-listid"); // Used for list types.
+ }
+ return doc;
+ }
+
+ @Override
+ public List getURLsFromPage(Document page) {
+ if (urlType == UrlType.LIST) {
+ // for url of type LIST, https://www.listal.com/list/my-list
+ return getURLsForListType(page);
+ } else if (urlType == UrlType.FOLDER) {
+ // for url of type FOLDER, https://www.listal.com/jim-carrey/pictures
+ return getURLsForFolderType(page);
+ }
+ return null;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ listalThreadPool.addThread(new ListalImageDownloadThread(url, index));
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Matcher m1 = p1.matcher(url.toExternalForm());
+ if (m1.matches()) {
+ // Return the text contained between () in the regex
+ urlType = UrlType.LIST;
+ return m1.group(1);
+ }
+
+ Matcher m2 = p2.matcher(url.toExternalForm());
+ if (m2.matches()) {
+ // Return only gid from capturing group of type listal.com/tvOrSomething/dexter/pictures
+ urlType = UrlType.FOLDER;
+ return getFolderTypeGid(m2.group(1));
+ }
+
+ throw new MalformedURLException("Expected listal.com URL format: "
+ + "listal.com/list/my-list-name - got " + url + " instead.");
+ }
+
+ @Override
+ public Document getNextPage(Document page) throws IOException, URISyntaxException {
+ Document nextPage = super.getNextPage(page);
+ switch (urlType) {
+ case LIST:
+ if (!page.select(".loadmoreitems").isEmpty()) {
+ // All items are not loaded.
+ // Load remaining items using postUrl.
+
+ String offSet = page.select(".loadmoreitems").last().attr("data-offset");
+ Map postParams = new HashMap<>();
+ postParams.put("listid", listId);
+ postParams.put("offset", offSet);
+ try {
+ nextPage = Http.url(postUrl).data(postParams).retries(3).post();
+ } catch (IOException e1) {
+ LOGGER.error("Failed to load more images after " + offSet, e1);
+ throw e1;
+ }
+ }
+ break;
+
+ case FOLDER:
+ Elements pageLinks = page.select(".pages a");
+ if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) {
+ String nextUrl = pageLinks.last().attr("abs:href");
+ nextPage = Http.url(nextUrl).retries(3).get();
+ }
+ break;
+
+ case UNKNOWN:
+ default:
+ }
+ return nextPage;
+ }
+
+
+ @Override
+ public DownloadThreadPool getThreadPool() {
+ return listalThreadPool;
+ }
+
+ /**
+ * Returns the image urls for UrlType LIST.
+ */
+ private List getURLsForListType(Document page) {
+ List list = new ArrayList<>();
+ for (Element e : page.select(".pure-g a[href*=viewimage]")) {
+ //list.add("https://www.listal.com" + e.attr("href") + "h");
+ list.add(e.attr("abs:href") + "h");
+ }
+
+ return list;
+ }
+
+ /**
+ * Returns the image urls for UrlType FOLDER.
+ */
+ private List getURLsForFolderType(Document page) {
+ List list = new ArrayList<>();
+ for (Element e : page.select("#browseimagescontainer .imagewrap-outer a")) {
+ list.add(e.attr("abs:href") + "h");
+ }
+ return list;
+ }
+
+ /**
+ * Returns the gid for url type listal.com/tvOrSomething/dexter/pictures
+ */
+ public String getFolderTypeGid(String group) throws MalformedURLException {
+ String[] folders = group.split("/");
+ try {
+ if (folders.length == 2 && folders[1].equals("pictures")) {
+ // Url is probably for an actor.
+ return folders[0];
+ }
+
+ if (folders.length == 3 && folders[2].equals("pictures")) {
+ // Url if for a folder(like movies, tv etc).
+ Document doc = Http.url(url).get();
+ return doc.select(".itemheadingmedium").first().text();
+ }
+
+ } catch (Exception e) {
+ LOGGER.error(e);
+ }
+ throw new MalformedURLException("Unable to fetch the gid for given url.");
+ }
+
+ private class ListalImageDownloadThread implements Runnable {
+
+ private final URL url;
+ private final int index;
+
+ public ListalImageDownloadThread(URL url, int index) {
+ super();
+ this.url = url;
+ this.index = index;
+ }
+
+ @Override
+ public void run() {
+ getImage();
+ }
+
+ public void getImage() {
+ try {
+ Document doc = Http.url(url).get();
+
+ String imageUrl = doc.getElementsByClass("pure-img").attr("src");
+ if (imageUrl != "") {
+ addURLToDownload(new URI(imageUrl).toURL(), getPrefix(index), "", null, null,
+ getImageName());
+ } else {
+ LOGGER.error("Couldnt find image from url: " + url);
+ }
+ } catch (IOException | URISyntaxException e) {
+ LOGGER.error("[!] Exception while downloading image: " + url, e);
+ }
+ }
+
+ public String getImageName() {
+ // Returns the image number of the link if possible.
+ String name = this.url.toExternalForm();
+ try {
+ name = name.substring(name.lastIndexOf("/") + 1);
+ } catch (Exception e) {
+ LOGGER.info("Failed to get name for the image.");
+ name = null;
+ }
+ // Listal stores images as .jpg
+ return name + ".jpg";
+ }
+ }
+
+ private static enum UrlType {
+ LIST, FOLDER, UNKNOWN
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
index 7eabfc6f0..de97c533b 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
@@ -1,26 +1,26 @@
package com.rarchives.ripme.ripper.rippers;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.jsoup.Connection;
+import org.jsoup.nodes.Document;
+
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
+import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.ripper.DownloadThreadPool;
-import com.rarchives.ripme.utils.Http;
-
public class LusciousRipper extends AbstractHTMLRipper {
- private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.
+ private static String albumid;
- private static final Pattern P = Pattern.compile("^https?:\\/\\/(?:members\\.|old\\.|www\\.)?luscious.net\\/albums\\/([-_.0-9a-zA-Z]+)\\/?");
- private DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");
+ private static final Pattern P = Pattern.compile("^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net/albums/([-_.0-9a-zA-Z]+)/?");
public LusciousRipper(URL url) throws IOException {
super(url);
@@ -37,40 +37,48 @@ public String getHost() {
}
@Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- Document page = Http.url(url).get();
- LOGGER.info("First page is " + url);
- return page;
- }
-
- @Override
- public List getURLsFromPage(Document page) {
+ public List getURLsFromPage(Document page) { // gets urls for all pages through the api
List urls = new ArrayList<>();
- Elements urlElements = page.select("div.item.thumbnail.ic_container > a");
- for (Element e : urlElements) {
- urls.add(e.attr("abs:href"));
- }
+ int totalPages = 1;
+
+ for (int i = 1; i <= totalPages; i++) {
+ String APIStringWOVariables = "https://apicdn.luscious.net/graphql/nobatch/?operationName=PictureListInsideAlbum&query=%2520query%2520PictureListInsideAlbum%28%2524input%253A%2520PictureListInput%21%29%2520%257B%2520picture%2520%257B%2520list%28input%253A%2520%2524input%29%2520%257B%2520info%2520%257B%2520...FacetCollectionInfo%2520%257D%2520items%2520%257B%2520__typename%2520id%2520title%2520description%2520created%2520like_status%2520number_of_comments%2520number_of_favorites%2520moderation_status%2520width%2520height%2520resolution%2520aspect_ratio%2520url_to_original%2520url_to_video%2520is_animated%2520position%2520permissions%2520url%2520tags%2520%257B%2520category%2520text%2520url%2520%257D%2520thumbnails%2520%257B%2520width%2520height%2520size%2520url%2520%257D%2520%257D%2520%257D%2520%257D%2520%257D%2520fragment%2520FacetCollectionInfo%2520on%2520FacetCollectionInfo%2520%257B%2520page%2520has_next_page%2520has_previous_page%2520total_items%2520total_pages%2520items_per_page%2520url_complete%2520%257D%2520&variables=";
+ Connection con = Http.url(APIStringWOVariables + encodeVariablesPartOfURL(i, albumid)).method(Connection.Method.GET).retries(5).connection();
+ con.ignoreHttpErrors(true);
+ con.ignoreContentType(true);
+ con.userAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0");
+ Connection.Response res;
+ try {
+ res = con.execute();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ String body = res.body();
- return urls;
- }
+ JSONObject jsonObject = new JSONObject(body);
- @Override
- public Document getNextPage(Document doc) throws IOException {
- // luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
- // Simply GET the nextPageUrl also works. Therefore, we do this...
- Element nextPageElement = doc.select("div#next_page > div > a").first();
- if (nextPageElement == null) {
- throw new IOException("No next page found.");
+ JSONObject data = jsonObject.getJSONObject("data");
+ JSONObject picture = data.getJSONObject("picture");
+ JSONObject list = picture.getJSONObject("list");
+ JSONArray items = list.getJSONArray("items");
+ JSONObject info = list.getJSONObject("info");
+ totalPages = info.getInt("total_pages");
+
+ for (int j = 0; j < items.length(); j++) {
+ JSONObject item = items.getJSONObject(j);
+ String urlToOriginal = item.getString("url_to_original");
+ urls.add(urlToOriginal);
+ }
}
- return Http.url(nextPageElement.attr("abs:href")).get();
+ return urls;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m = P.matcher(url.toExternalForm());
if (m.matches()) {
+ albumid = m.group(1).split("_")[m.group(1).split("_").length - 1];
return m.group(1);
}
throw new MalformedURLException("Expected luscious.net URL format: "
@@ -78,79 +86,17 @@ public String getGID(URL url) throws MalformedURLException {
}
@Override
- public void downloadURL(URL url, int index) {
- lusciousThreadPool.addThread(new LusciousDownloadThread(url, index));
+ protected void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
}
- @Override
- public DownloadThreadPool getThreadPool() {
- return lusciousThreadPool;
- }
-
- @Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- // Sanitizes the url removing GET parameters and convert to old api url.
- // "https://old.luscious.net/albums/albumname"
+ public static String encodeVariablesPartOfURL(int page, String albumId) {
try {
- Matcher m = P.matcher(url.toString());
- if (m.matches()) {
- String sanitizedUrl = m.group();
- sanitizedUrl = sanitizedUrl.replaceFirst(
- "^https?:\\/\\/(?:members\\.|old\\.|www\\.)?luscious.net",
- "https://old.luscious.net");
- return new URL(sanitizedUrl);
- }
-
- throw new Exception("ERROR: Unable to sanitize url.");
- } catch (Exception e) {
- LOGGER.info("Error sanitizing the url.");
- LOGGER.error(e);
- return super.sanitizeURL(url);
- }
- }
-
- @Override
- public String normalizeUrl(String url) {
- try {
- return url.toString().replaceFirst(
- "^https?:\\/\\/(?:members\\.|old\\.)?luscious.net", "https://www.luscious.net");
- } catch (Exception e) {
- LOGGER.info("Error normalizing the url.");
- LOGGER.error(e);
- return super.normalizeUrl(url);
- }
- }
-
- public class LusciousDownloadThread extends Thread {
- private URL url;
- private int index;
+ String json = "{\"input\":{\"filters\":[{\"name\":\"album_id\",\"value\":\"" + albumId + "\"}],\"display\":\"rating_all_time\",\"items_per_page\":50,\"page\":" + page + "}}";
- public LusciousDownloadThread(URL url, int index) {
- this.url = url;
- this.index = index;
+ return URLEncoder.encode(json, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new IllegalStateException("Could not encode variables");
}
-
- @Override
- public void run() {
- try {
- Document page = Http.url(url).retries(RETRY_COUNT).get();
-
- String downloadUrl = page.select(".icon-download").attr("abs:href");
- if (downloadUrl.equals("")) {
- // This is here for pages with mp4s instead of images.
- downloadUrl = page.select("div > video > source").attr("src");
- if (!downloadUrl.equals("")) {
- throw new IOException("Could not find download url for image or video.");
- }
- }
-
- //If a valid download url was found.
- addURLToDownload(new URL(downloadUrl), getPrefix(index));
-
- } catch (IOException e) {
- LOGGER.error("Error downloadiong url " + url, e);
- }
- }
-
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java
index ea8c45306..8c6c92271 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java
@@ -1,40 +1,42 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
-import com.rarchives.ripme.ui.History;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.Http;
-import com.rarchives.ripme.utils.Utils;
-import org.apache.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONObject;
-import org.jsoup.Connection;
-import org.jsoup.nodes.Document;
-import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MangadexRipper extends AbstractJSONRipper {
- private String chapterApiEndPoint = "https://mangadex.org/api/chapter/";
- private String mangaApiEndPoint = "https://mangadex.org/api/manga/";
+ private final String chapterApiEndPoint = "https://mangadex.org/api/chapter/";
+ private final String mangaApiEndPoint = "https://mangadex.org/api/manga/";
private boolean isSingleChapter;
- private String getImageUrl(String chapterHash, String imageName, String server) {
- return server + chapterHash + "/" + imageName;
- }
public MangadexRipper(URL url) throws IOException {
super(url);
}
+ private String getImageUrl(String chapterHash, String imageName, String server) {
+ return server + chapterHash + "/" + imageName;
+ }
+
@Override
public String getHost() {
return "mangadex";
}
+
@Override
public String getDomain() {
return "mangadex.org";
@@ -50,14 +52,12 @@ public String getGID(URL url) throws MalformedURLException {
String capID = getChapterID(url.toExternalForm());
String mangaID = getMangaID(url.toExternalForm());
if (capID != null) {
- isSingleChapter=true;
+ isSingleChapter = true;
return capID;
+ } else if (mangaID != null) {
+ isSingleChapter = false;
+ return mangaID;
}
- else
- if(mangaID!=null){
- isSingleChapter=false;
- return mangaID;
- }
throw new MalformedURLException("Unable to get chapter ID from" + url);
}
@@ -69,10 +69,11 @@ private String getChapterID(String url) {
}
return null;
}
- private String getMangaID(String url){
+
+ private String getMangaID(String url) {
Pattern p = Pattern.compile("https://mangadex.org/title/([\\d]+)/(.+)");
Matcher m = p.matcher(url);
- if(m.matches()){
+ if (m.matches()) {
return m.group(1);
}
return null;
@@ -80,20 +81,19 @@ private String getMangaID(String url){
@Override
- public JSONObject getFirstPage() throws IOException {
+ public JSONObject getFirstPage() throws IOException, URISyntaxException {
// Get the chapter ID
String chapterID = getChapterID(url.toExternalForm());
String mangaID = getMangaID(url.toExternalForm());
- if(mangaID!=null){
- return Http.url(new URL(mangaApiEndPoint+mangaID)).getJSON();
- }
- else
- return Http.url(new URL(chapterApiEndPoint + chapterID)).getJSON();
+ if (mangaID != null) {
+ return Http.url(new URI(mangaApiEndPoint + mangaID).toURL()).getJSON();
+ } else
+ return Http.url(new URI(chapterApiEndPoint + chapterID).toURL()).getJSON();
}
@Override
protected List getURLsFromJSON(JSONObject json) {
- if(isSingleChapter){
+ if (isSingleChapter) {
List assetURLs = new ArrayList<>();
JSONArray currentObject;
String chapterHash;
@@ -111,12 +111,12 @@ protected List getURLsFromJSON(JSONObject json) {
JSONObject chaptersJSON = (JSONObject) json.get("chapter");
JSONObject temp;
Iterator keys = chaptersJSON.keys();
- HashMap chapterIDs = new HashMap<>();
+ HashMap chapterIDs = new HashMap<>();
while (keys.hasNext()) {
- String keyValue = (String) keys.next();
- temp=(JSONObject)chaptersJSON.get(keyValue);
- if(temp.getString("lang_name").equals("English")) {
- chapterIDs.put(temp.getDouble("chapter"),keyValue);
+ String keyValue = keys.next();
+ temp = (JSONObject) chaptersJSON.get(keyValue);
+ if (temp.getString("lang_name").equals("English")) {
+ chapterIDs.put(temp.getDouble("chapter"), keyValue);
}
}
@@ -126,17 +126,16 @@ protected List getURLsFromJSON(JSONObject json) {
String chapterHash;
// Server is the cdn hosting the images.
String server;
- JSONObject chapterJSON=null;
- TreeMap treeMap = new TreeMap<>(chapterIDs);
- Iterator it = treeMap.keySet().iterator();
- while(it.hasNext()) {
- double key =(double) it.next();
+ JSONObject chapterJSON = null;
+ TreeMap treeMap = new TreeMap<>(chapterIDs);
+ for (Double aDouble : treeMap.keySet()) {
+ double key = (double) aDouble;
try {
- chapterJSON = Http.url(new URL(chapterApiEndPoint + treeMap.get(key))).getJSON();
- } catch (IOException e) {
+ chapterJSON = Http.url(new URI(chapterApiEndPoint + treeMap.get(key)).toURL()).getJSON();
+ } catch (IOException | URISyntaxException e) {
e.printStackTrace();
}
- sendUpdate(RipStatusMessage.STATUS.LOADING_RESOURCE,"chapter "+key);
+ sendUpdate(RipStatusMessage.STATUS.LOADING_RESOURCE, "chapter " + key);
chapterHash = chapterJSON.getString("hash");
server = chapterJSON.getString("server");
for (int i = 0; i < chapterJSON.getJSONArray("page_array").length(); i++) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ManganeloRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ManganeloRipper.java
index f4325aa12..c5f6b1429 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ManganeloRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ManganeloRipper.java
@@ -48,12 +48,6 @@ public String getGID(URL url) throws MalformedURLException {
"/manganelo.com/manga/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
Element elem = doc.select("div.btn-navigation-chap > a.back").first();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
index 8bdd2b2fb..2c83ce7ed 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
@@ -21,12 +21,12 @@ public MeituriRipper(URL url) throws IOException {
@Override
public String getHost() {
- return "meituri";
+ return "tujigu";
}
@Override
public String getDomain() {
- return "meituri.com";
+ return "tujigu.com";
}
// To use in getting URLs
@@ -35,23 +35,18 @@ public String getDomain() {
@Override
public String getGID(URL url) throws MalformedURLException {
// without escape
- // ^https?://[w.]*meituri\.com/a/([0-9]+)/([0-9]+\.html)*$
- // https://www.meituri.com/a/14449/
- // also matches https://www.meituri.com/a/14449/3.html etc.
+ // ^https?://[w.]*tujigu\.com/a/([0-9]+)/([0-9]+\.html)*$
+ // https://www.tujigu.com/a/14449/
+ // also matches https://www.tujigu.com/a/14449/3.html etc.
// group 1 is 14449
- Pattern p = Pattern.compile("^https?://[w.]*meituri\\.com/a/([0-9]+)/([0-9]+\\.html)*$");
+ Pattern p = Pattern.compile("^https?://[w.]*tujigu\\.com/a/([0-9]+)/([0-9]+\\.html)*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
albumID = m.group(1);
return m.group(1);
}
throw new MalformedURLException(
- "Expected meituri.com URL format: " + "meituri.com/a/albumid/ - got " + url + "instead");
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
+ "Expected tujigu.com URL format: " + "tujigu.com/a/albumid/ - got " + url + "instead");
}
@Override
@@ -71,7 +66,7 @@ public List getURLsFromPage(Document doc) {
}
// Base URL: http://ii.hywly.com/a/1/albumid/imgnum.jpg
- String baseURL = "http://ii.hywly.com/a/1/" + albumID + "/";
+ String baseURL = "https://tjg.hywly.com/a/1/" + albumID + "/";
// Loop through and add images to the URL list
for (int i = 1; i <= numOfImages; i++) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java
index 0b513b377..c2d6ed47d 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java
@@ -41,11 +41,6 @@ public String getGID(URL url) throws MalformedURLException {
throw new MalformedURLException("Expected URL format: http://www.modelx.org/[category (one or more)]/xxxxx got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document page) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java
index 598cf5d4f..d2af02a15 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -13,7 +15,6 @@
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
-import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.select.Elements;
@@ -59,20 +60,21 @@ protected Document getFirstPage() throws IOException {
if (!notHome) {
StringBuilder newPath = new StringBuilder(path);
newPath.insert(2, "M");
- firstURL = new URL(this.url, "https://" + DOMAIN + newPath);
+ firstURL = URI.create("https://" + DOMAIN + newPath).toURL();
LOGGER.info("Changed URL to " + firstURL);
}
return Http.url(firstURL).referrer("https://motherless.com").get();
}
@Override
- public Document getNextPage(Document doc) throws IOException {
+ public Document getNextPage(Document doc) throws IOException, URISyntaxException {
+
Elements nextPageLink = doc.head().select("link[rel=next]");
if (nextPageLink.isEmpty()) {
throw new IOException("Last page reached");
} else {
String referrerLink = doc.head().select("link[rel=canonical]").first().attr("href");
- URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
+ URL nextURL = this.url.toURI().resolve(nextPageLink.first().attr("href")).toURL();
return Http.url(nextURL).referrer(referrerLink).get();
}
}
@@ -109,7 +111,7 @@ protected List getURLsFromPage(Document page) {
@Override
protected void downloadURL(URL url, int index) {
// Create thread for finding image at "url" page
- MotherlessImageThread mit = new MotherlessImageThread(url, index);
+ MotherlessImageRunnable mit = new MotherlessImageRunnable(url, index);
motherlessThreadPool.addThread(mit);
try {
Thread.sleep(IMAGE_SLEEP_TIME);
@@ -148,15 +150,19 @@ public String getGID(URL url) throws MalformedURLException {
throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url);
}
-
+ @Override
+ protected DownloadThreadPool getThreadPool() {
+ return motherlessThreadPool;
+ }
+
/**
* Helper class to find and download images found on "image" pages
*/
- private class MotherlessImageThread extends Thread {
- private URL url;
- private int index;
+ private class MotherlessImageRunnable implements Runnable {
+ private final URL url;
+ private final int index;
- MotherlessImageThread(URL url, int index) {
+ MotherlessImageRunnable(URL url, int index) {
super();
this.url = url;
this.index = index;
@@ -180,11 +186,11 @@ public void run() {
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
- addURLToDownload(new URL(file), prefix);
+ addURLToDownload(new URI(file).toURL(), prefix);
} else {
LOGGER.warn("[!] could not find '__fileurl' at " + url);
}
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java
new file mode 100644
index 000000000..642c6417e
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java
@@ -0,0 +1,223 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+
+public class MrCongRipper extends AbstractHTMLRipper {
+
+ private Document currDoc;
+ private int lastPageNum;
+ private int currPageNum;
+ private boolean tagPage = false;
+
+ public MrCongRipper(URL url) throws IOException {
+ super(url);
+ currPageNum = 1;
+ }
+
+ @Override
+ public String getHost() {
+ return "mrcong";
+ }
+
+ @Override
+ public String getDomain() {
+ return "mrcong.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ System.out.println(url.toExternalForm());
+ Pattern p = Pattern.compile("^https?://mrcong\\.com/(\\S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$");
+ Pattern p2 = Pattern.compile("^https?://mrcong\\.com/tag/(\\S*)/$"); //Added 6-10-21
+ Matcher m = p.matcher(url.toExternalForm());
+ Matcher m2 = p2.matcher(url.toExternalForm()); //6-10-21
+ if (m.matches()) {
+ return m.group(1);
+ }
+ else if(m2.matches()) { //Added 6-10-21
+ tagPage = true;
+ System.out.println("tagPage = TRUE");
+ return m2.group(1);
+ }
+
+ throw new MalformedURLException("Expected mrcong.com URL format: "
+ + "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead");
+ }
+
+ @Override
+ public Document getFirstPage() throws IOException { //returns the root gallery page regardless of actual page number
+ // "url" is an instance field of the superclass
+ String rootUrlStr;
+ URL rootUrl;
+
+ if(!tagPage) {
+ rootUrlStr = url.toExternalForm().replaceAll("(|/|/[0-9]+/?)$", "/");
+ } else { //6-10-21
+ rootUrlStr = url.toExternalForm().replaceAll("(page/[0-9]+/)$", "page/1/");
+ }
+
+ rootUrl = URI.create(rootUrlStr).toURL();
+ url = rootUrl;
+ currPageNum = 1;
+ currDoc = Http.url(url).get();
+ getMaxPageNumber(currDoc);
+ return currDoc;
+ }
+
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ int pageNum = currPageNum;
+ String urlStr;
+ if(!tagPage) {
+ if (pageNum == 1 && lastPageNum > 1) {
+ urlStr = url.toExternalForm().concat((pageNum + 1) + "");
+ System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
+ } else if (pageNum < lastPageNum) {
+ urlStr = url.toExternalForm().replaceAll("(/([0-9]*)/?)$", ("/" + (pageNum + 1) + "/"));
+ System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
+ } else {
+ //System.out.printf("Error: Page number provided goes past last valid page number\n");
+ throw (new IOException("Error: Page number provided goes past last valid page number\n"));
+ }
+ } else { //6-10-21
+ //if (pageNum == 1 && lastPageNum >= 1) {
+ if (pageNum == 1 && lastPageNum > 1) { //6-10-21
+ urlStr = url.toExternalForm().concat("page/" + (pageNum + 1) + "");
+ System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
+ } else if (pageNum < lastPageNum) {
+ urlStr = url.toExternalForm().replaceAll("(page/([0-9]*)/?)$", ("page/" + (pageNum + 1) + "/"));
+ System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
+ } else {
+ //System.out.printf("Error: Page number provided goes past last valid page number\n");
+ System.out.print("Error: There is no next page!\n");
+ return null;
+ //throw (new IOException("Error: Page number provided goes past last valid page number\n"));
+ }
+ }
+
+ url = URI.create(urlStr).toURL();
+ currDoc = Http.url(url).get();
+ currPageNum ++;//hi
+ return currDoc;
+ }
+
+ private int getMaxPageNumber(Document doc) {
+ if(!tagPage) {
+ try {
+ lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); //gets the last possible page for the gallery
+ } catch(Exception e) {
+ return 1;
+ }
+ } else {
+ try {
+ lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); //gets the last possible page for the gallery
+ System.out.println("The last page found for " + url + " was " + lastPageNum);
+ } catch(Exception e) {
+ return 1;
+ }
+ }
+
+ return lastPageNum;
+ }
+
+ private int getCurrentPageNum(Document doc) {
+ int currPage; //6-10-21
+
+ if(!tagPage) {
+ currPage = Integer.parseInt(doc.select("div.page-link > span").first().text());
+ } else {
+ currPage = Integer.parseInt(doc.select("div.pagination > span").first().text());
+ }
+
+ System.out.println("The current page was found to be: " + currPage);
+
+ return currPage;
+ }
+
+ @Override
+ public List getURLsFromPage(Document doc) { //gets the urls of the images
+ List result = new ArrayList<>();
+
+ if(!tagPage) {
+ for (Element el : doc.select("p > img")) {
+ String imageSource = el.attr("src");
+ result.add(imageSource);
+ }
+
+ System.out.println("\n1.)Printing List: " + result + "\n");
+ } else { //6-10-21
+ //List gallery_set_list = new ArrayList<>();
+
+ for (Element el : doc.select("h2 > a")) {
+ String pageSource = el.attr("href");
+ if(!pageSource.equals("https://mrcong.com/")) {
+ result.add(pageSource);
+ System.out.println("\n" + pageSource + " has been added to the list.");
+ }
+ }
+
+ /*for (String el2 : gallery_set_list) {
+ try {
+ URL temp_urL = URI.create(el2).toURL();
+ MrCongRipper mcr = new MrCongRipper(temp_urL);
+ System.out.println("URL being ripped: " + mcr.url.toString());
+ result.addAll(mcr.getURLsFromPage(mcr.getFirstPage()));
+
+ Document nextPg = mcr.getNextPage(mcr.currDoc);
+ while(nextPg != null) {
+ result.addAll(mcr.getURLsFromPage(nextPg));
+ nextPg = mcr.getNextPage(mcr.currDoc);
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }*/
+
+ System.out.println("\n2.)Printing List: " + result + "\n");
+ }
+
+ return result;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ //addURLToDownload(url, getPrefix(index));
+
+ if(!tagPage) {
+ addURLToDownload(url, getPrefix(index));
+ } else {
+ try {
+ List ls = this.getURLsFromPage(this.currDoc);
+ Document np = this.getNextPage(this.currDoc);
+
+ while(np != null) { //Creates a list of all sets to download
+ ls.addAll(this.getURLsFromPage(np));
+ np = this.getNextPage(np);
+ }
+
+ for(String urlStr : ls) {
+ MrCongRipper mcr = new MrCongRipper(URI.create(urlStr).toURL());
+ mcr.setup();
+ mcr.rip();
+ }
+
+ } catch (IOException | URISyntaxException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MultpornRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MultpornRipper.java
new file mode 100644
index 000000000..cdc873f2d
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MultpornRipper.java
@@ -0,0 +1,71 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class MultpornRipper extends AbstractHTMLRipper {
+
+ public MultpornRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ protected String getDomain() {
+ return "multporn.net";
+ }
+
+ @Override
+ public String getHost() {
+ return "multporn";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException, URISyntaxException {
+ Pattern p = Pattern.compile("^https?://multporn\\.net/node/(\\d+)/.*$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+
+ try {
+ String nodeHref = Http.url(url).get().select(".simple-mode-switcher").attr("href");
+ p = Pattern.compile("/node/(\\d+)/.*");
+ m = p.matcher(nodeHref);
+ if (m.matches()) {
+ this.url = new URI("https://multporn.net" + nodeHref).toURL();
+ return m.group(1);
+ }
+ }catch (Exception ignored){};
+
+ throw new MalformedURLException("Expected multporn.net URL format: " +
+ "multporn.net/comics/comicid / multporn.net/node/id/* - got " + url + " instead");
+ }
+
+ @Override
+ protected List getURLsFromPage(Document page) {
+ List imageURLs = new ArrayList<>();
+ Elements thumbs = page.select(".mfp-gallery-image .mfp-item");
+ for (Element el : thumbs) {
+ imageURLs.add(el.attr("href"));
+ }
+ return imageURLs;
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java
index 453826a39..deedfb888 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java
@@ -4,6 +4,7 @@
import com.rarchives.ripme.utils.Http;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -13,8 +14,6 @@
import org.jsoup.nodes.Element;
public class MyhentaicomicsRipper extends AbstractHTMLRipper {
- private static boolean isTag;
-
public MyhentaicomicsRipper(URL url) throws IOException {
super(url);
}
@@ -69,7 +68,6 @@ public boolean pageContainsAlbums(URL url) {
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+?=:]*)?$");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
- isTag = true;
return true;
}
return false;
@@ -85,9 +83,8 @@ public List getAlbumsToQueue(Document doc) {
}
@Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
+ public Document getFirstPage() throws IOException, URISyntaxException {
+ return super.getFirstPage();
}
@Override
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaigalleryRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaigalleryRipper.java
index d8422942c..c9f4c0bd9 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaigalleryRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaigalleryRipper.java
@@ -40,12 +40,6 @@ public String getGID(URL url) throws MalformedURLException {
+ "myhentaigallery.com/gallery/thumbnails/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MyreadingmangaRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MyreadingmangaRipper.java
index 20a3cf2d9..30fab521d 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MyreadingmangaRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MyreadingmangaRipper.java
@@ -41,12 +41,6 @@ public String getGID(URL url) throws MalformedURLException {
+ "myreadingmanga.info/title - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NatalieMuRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NatalieMuRipper.java
index 952b434e8..8cf24fd8e 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NatalieMuRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NatalieMuRipper.java
@@ -79,11 +79,6 @@ public String getDomain() {
return this.url.getHost();
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(this.url).get();
- }
-
@Override
public List getURLsFromPage(Document page) {
List imageURLs = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java
index b3ededc4b..a7be157a3 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java
@@ -53,7 +53,7 @@ public String getGID(URL url) throws MalformedURLException {
@Override
protected Document getFirstPage() throws IOException {
- return Http.url("https://" + this.username + ".newgrounds.com/art").get();
+ return Http.url("https://" + this.username + ".newgrounds.com/art").timeout(10*1000).get();
}
@Override
@@ -71,7 +71,7 @@ protected List getURLsFromPage(Document page) {
List imageURLs = new ArrayList<>();
String documentHTMLString = page.toString().replaceAll(""", "");
- String findStr = "newgrounds.com\\/art\\/view\\/" + this.username;
+ String findStr = "newgrounds.com/art/view/" + this.username;
int lastIndex = 0;
// Index where findStr is found; each occasion contains the link to an image
@@ -95,7 +95,7 @@ protected List getURLsFromPage(Document page) {
if(i == indices.size() - 1){
s = documentHTMLString.substring(indices.get(i) + 2);
} else{
- s = documentHTMLString.substring(indices.get(i) + 2, indices.get(i + 1));
+ s = documentHTMLString.substring(indices.get(i) + 1, indices.get(i + 1));
}
s = s.replaceAll("\n", "").replaceAll("\t", "")
@@ -106,13 +106,14 @@ protected List getURLsFromPage(Document page) {
if (m.lookingAt()) {
String testURL = m.group(3) + "_" + this.username + "_" + m.group(1);
+ testURL = testURL.replace("_full", "");
// Open new document to get full sized image
try {
Document imagePage = Http.url(inLink + m.group(1)).get();
for(String extensions: this.ALLOWED_EXTENSIONS){
if(imagePage.toString().contains(testURL + "." + extensions)){
- imageUrl += m.group(2) + "/" + m.group(3) + "_" + this.username + "_" + m.group(1) + "." + extensions;
+ imageUrl += m.group(2) + "/" + m.group(3).replace("_full","") + "_" + this.username + "_" + m.group(1) + "." + extensions;
imageURLs.add(imageUrl);
break;
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java
index 86079edc3..35a1f8add 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -29,8 +31,6 @@ public class NfsfwRipper extends AbstractHTMLRipper {
"https?://[wm.]*nfsfw.com/gallery/v/[^/]+/(.+)$"
);
- // cached first page
- private Document fstPage;
// threads pool for downloading images from image pages
private DownloadThreadPool nfsfwThreadPool;
@@ -49,13 +49,6 @@ public String getHost() {
return HOST;
}
- @Override
- protected Document getFirstPage() throws IOException {
- // cache the first page
- this.fstPage = Http.url(url).get();
- return fstPage;
- }
-
@Override
public Document getNextPage(Document page) throws IOException {
String nextURL = null;
@@ -113,13 +106,13 @@ protected void downloadURL(URL url, int index) {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
// always start on the first page of an album
// (strip the options after the '?')
String u = url.toExternalForm();
if (u.contains("?")) {
u = u.substring(0, u.indexOf("?"));
- return new URL(u);
+ return new URI(u).toURL();
} else {
return url;
}
@@ -157,9 +150,15 @@ public boolean hasQueueSupport() {
@Override
public boolean pageContainsAlbums(URL url) {
- List imageURLs = getImagePageURLs(fstPage);
- List subalbumURLs = getSubalbumURLs(fstPage);
- return imageURLs.isEmpty() && !subalbumURLs.isEmpty();
+ try {
+ final var fstPage = getCachedFirstPage();
+ List imageURLs = getImagePageURLs(fstPage);
+ List subalbumURLs = getSubalbumURLs(fstPage);
+ return imageURLs.isEmpty() && !subalbumURLs.isEmpty();
+ } catch (IOException | URISyntaxException e) {
+ LOGGER.error("Unable to load " + url, e);
+ return false;
+ }
}
@Override
@@ -196,10 +195,10 @@ private List getSubalbumURLs(Document page){
/**
* Helper class to find and download images found on "image" pages
*/
- private class NfsfwImageThread extends Thread {
- private URL url;
- private String subdir;
- private int index;
+ private class NfsfwImageThread implements Runnable {
+ private final URL url;
+ private final String subdir;
+ private final int index;
NfsfwImageThread(URL url, String subdir, int index) {
super();
@@ -223,8 +222,8 @@ public void run() {
if (file.startsWith("/")) {
file = "http://nfsfw.com" + file;
}
- addURLToDownload(new URL(file), getPrefix(index), this.subdir);
- } catch (IOException e) {
+ addURLToDownload(new URI(file).toURL(), getPrefix(index), this.subdir);
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java
index 49fc1d8a3..fe50f1f16 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java
@@ -126,7 +126,7 @@ public List getURLsFromPage(Document page) {
List imageURLs = new ArrayList<>();
Elements thumbs = page.select("a.gallerythumb > img");
for (Element el : thumbs) {
- imageURLs.add(el.attr("data-src").replaceAll("t\\.n", "i.n").replaceAll("t\\.", "."));
+ imageURLs.add(el.attr("data-src").replaceAll("://t", "://i").replaceAll("t\\.", "."));
}
return imageURLs;
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NsfwXxxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NsfwXxxRipper.java
new file mode 100644
index 000000000..7e26faa2a
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NsfwXxxRipper.java
@@ -0,0 +1,135 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractJSONRipper;
+import com.rarchives.ripme.utils.Http;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.json.JSONArray;
+import org.json.JSONObject;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+public class NsfwXxxRipper extends AbstractJSONRipper {
+
+ public NsfwXxxRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ protected String getDomain() {
+ return "nsfw.xxx";
+ }
+
+ @Override
+ public String getHost() {
+ return "nsfw_xxx";
+ }
+
+
+ @Override
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
+ String u = url.toExternalForm();
+ // https://nsfw.xxx/user/kelly-kat/foo -> https://nsfw.xxx/user/kelly-kat
+ // https://nsfw.xxx/user/kelly-kat -> https://nsfw.xxx/user/kelly-kat
+ // keep up to and including the username
+ u = u.replaceAll("https?://nsfw.xxx/user/([^/]+)/?.*", "https://nsfw.xxx/user/$1");
+ if (!u.contains("nsfw.xxx/user")) {
+ throw new MalformedURLException("Invalid URL: " + url);
+ }
+
+ return new URI(u).toURL();
+ }
+
+ String getUser() throws MalformedURLException {
+ return getGID(url);
+ }
+
+ URL getPage(int page) throws MalformedURLException, URISyntaxException {
+ return new URI("https://nsfw.xxx/slide-page/" + page + "?nsfw%5B%5D=0&types%5B%5D=image&types%5B%5D=video&types%5B%5D=gallery&slider=1&jsload=1&user=" + getUser()).toURL();
+ }
+
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p = Pattern.compile("https://nsfw.xxx/user/([^/]+)/?$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected URL format: " +
+ "nsfw.xxx/user/USER - got " + url + " instead");
+ }
+
+
+ int currentPage = 1;
+
+ @Override
+ protected JSONObject getFirstPage() throws IOException, URISyntaxException {
+ return Http.url(getPage(1)).getJSON();
+ }
+
+ List descriptions = new ArrayList<>();
+
+ @Override
+ protected JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException {
+ currentPage++;
+ JSONObject nextPage = Http.url(getPage(doc.getInt("page") + 1)).getJSON();
+ JSONArray items = nextPage.getJSONArray("items");
+ if (items.isEmpty()) {
+ throw new IOException("No more pages");
+ }
+ return nextPage;
+ }
+
+ class ApiEntry {
+ String srcUrl;
+ String author;
+ String title;
+
+ public ApiEntry(String srcUrl, String author, String title) {
+ this.srcUrl = srcUrl;
+ this.author = author;
+ this.title = title;
+ }
+ }
+
+ @Override
+ protected List getURLsFromJSON(JSONObject json) {
+ JSONArray items = json.getJSONArray("items");
+ List data = IntStream
+ .range(0, items.length())
+ .mapToObj(items::getJSONObject)
+ .map(o -> {
+ String srcUrl;
+ if(o.has("src")) {
+ srcUrl = o.getString("src");
+ } else {
+ // video source
+ Pattern videoHtmlSrcPattern = Pattern.compile("src=\"([^\"]+)\"");
+ Matcher matches = videoHtmlSrcPattern.matcher(o.getString("html"));
+ matches.find();
+ srcUrl = StringEscapeUtils.unescapeHtml(matches.group(1));
+ }
+
+ return new ApiEntry(srcUrl, o.getString("author"), o.getString("title"));
+ })
+ .toList();
+
+ data.forEach(e -> descriptions.add(e.title));
+ return data.stream().map(e -> e.srcUrl).collect(Collectors.toList());
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index) + descriptions.get(index - 1) + "_" , "", "", null);
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java
index 3300da500..ea145aad3 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java
@@ -16,8 +16,6 @@
import com.rarchives.ripme.utils.Http;
public class NudeGalsRipper extends AbstractHTMLRipper {
- // Current HTML document
- private Document albumDoc = null;
public NudeGalsRipper(URL url) throws IOException {
super(url);
@@ -50,14 +48,6 @@ public String getGID(URL url) throws MalformedURLException {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = Http.url(url).get();
- }
- return albumDoc;
- }
-
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
@@ -77,4 +67,4 @@ public void downloadURL(URL url, int index) {
// Send referrer when downloading images
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java
index a51833972..e03d3bdcd 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java
@@ -46,12 +46,6 @@ public String getAlbumTitle(URL url) throws MalformedURLException {
return getDomain();
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("div#nav > a > div#nx").first() == null) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java
index d2421f373..39d56b83e 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java
@@ -3,25 +3,28 @@
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
-import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.log4j.Logger;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class PahealRipper extends AbstractHTMLRipper {
- private static final Logger logger = Logger.getLogger(PahealRipper.class);
+ private static final Logger logger = LogManager.getLogger(PahealRipper.class);
private static Map cookies = null;
private static Pattern gidPattern = null;
@@ -56,7 +59,7 @@ public Document getFirstPage() throws IOException {
@Override
public Document getNextPage(Document page) throws IOException {
for (Element e : page.select("#paginator a")) {
- if (e.text().toLowerCase().equals("next")) {
+ if (e.text().equalsIgnoreCase("next")) {
return Http.url(e.absUrl("href")).cookies(getCookies()).get();
}
}
@@ -88,12 +91,12 @@ public void downloadURL(URL url, int index) {
name = name.substring(0, name.length() - ext.length());
}
- File outFile = new File(workingDir.getCanonicalPath()
- + File.separator
+ Path outFile = Paths.get(workingDir
+ + "/"
+ Utils.filesystemSafe(new URI(name).getPath())
+ ext);
addURLToDownload(url, outFile);
- } catch (IOException | URISyntaxException ex) {
+ } catch (URISyntaxException ex) {
logger.error("Error while downloading URL " + url, ex);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
index 680d2c09c..097fe2c05 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -82,7 +84,7 @@ public String getHost() {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
LOGGER.info(url);
String u = url.toExternalForm();
if (u.contains("?")) {
@@ -93,11 +95,11 @@ public URL sanitizeURL(URL url) throws MalformedURLException {
// append trailing slash
u = u + "/";
}
- return new URL(u);
+ return new URI(u).toURL();
}
@Override
- public String getGID(URL url) throws MalformedURLException {
+ public String getGID(URL url) throws MalformedURLException, URISyntaxException {
Matcher m;
URL sanitized = sanitizeURL(url);
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PichunterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PichunterRipper.java
index e6c5d110e..bdb5f528c 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PichunterRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PichunterRipper.java
@@ -63,12 +63,6 @@ private boolean isPhotoSet(URL url) {
return m.matches();
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
// We use comic-nav-next to the find the next page
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PicstatioRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PicstatioRipper.java
index 1bd103b54..65d43d397 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PicstatioRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PicstatioRipper.java
@@ -51,12 +51,6 @@ public String getGID(URL url) throws MalformedURLException {
"www.picstatio.com//ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("a.next_page") != null) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixRipper.java
index b45796848..f021269f1 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixRipper.java
@@ -41,12 +41,6 @@ public String getGID(URL url) throws MalformedURLException {
"porncomix.info/comic - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixinfoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixinfoRipper.java
index 241ad5d7e..8aef59a62 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixinfoRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixinfoRipper.java
@@ -41,12 +41,6 @@ public String getGID(URL url) throws MalformedURLException {
"porncomixinfo.net/chapter/CHAP/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
index 197bdcbd9..a2ce4a196 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
@@ -1,9 +1,11 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
+import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@@ -47,12 +49,12 @@ protected Document getFirstPage() throws IOException {
}
@Override
- public Document getNextPage(Document page) throws IOException {
+ public Document getNextPage(Document page) throws IOException, URISyntaxException {
Elements nextPageLink = page.select("li.page_next > a");
if (nextPageLink.isEmpty()){
throw new IOException("No more pages");
} else {
- URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
+ URL nextURL = this.url.toURI().resolve(nextPageLink.first().attr("href")).toURL();
return Http.url(nextURL).get();
}
}
@@ -74,7 +76,7 @@ protected List getURLsFromPage(Document page) {
@Override
protected void downloadURL(URL url, int index) {
- PornhubImageThread t = new PornhubImageThread(url, index, this.workingDir);
+ PornhubImageThread t = new PornhubImageThread(url, index, this.workingDir.toPath());
pornhubThreadPool.addThread(t);
try {
Thread.sleep(IMAGE_SLEEP_TIME);
@@ -83,13 +85,13 @@ protected void downloadURL(URL url, int index) {
}
}
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
// always start on the first page of an album
// (strip the options after the '?')
String u = url.toExternalForm();
if (u.contains("?")) {
u = u.substring(0, u.indexOf("?"));
- return new URL(u);
+ return new URI(u).toURL();
} else {
return url;
}
@@ -126,11 +128,11 @@ public boolean canRip(URL url) {
*
* Handles case when site has IP-banned the user.
*/
- private class PornhubImageThread extends Thread {
- private URL url;
- private int index;
+ private class PornhubImageThread implements Runnable {
+ private final URL url;
+ private final int index;
- PornhubImageThread(URL url, int index, File workingDir) {
+ PornhubImageThread(URL url, int index, Path workingDir) {
super();
this.url = url;
this.index = index;
@@ -159,10 +161,10 @@ private void fetchImage() {
prefix = String.format("%03d_", index);
}
- URL imgurl = new URL(url, imgsrc);
+ URL imgurl = url.toURI().resolve(imgsrc).toURL();
addURLToDownload(imgurl, prefix);
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PornpicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PornpicsRipper.java
index b779c480a..799f7294d 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornpicsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornpicsRipper.java
@@ -41,12 +41,6 @@ public String getGID(URL url) throws MalformedURLException {
"www.pornpics.com/galleries/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
index 09569fc74..dcfa14e77 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
@@ -1,14 +1,22 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
import java.io.IOException;
+import java.io.OutputStream;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage;
+import j2html.TagCreator;
+import j2html.tags.ContainerTag;
+import j2html.tags.specialized.DivTag;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
@@ -19,6 +27,9 @@
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
+import org.jsoup.Jsoup;
+
+import static j2html.TagCreator.*;
public class RedditRipper extends AlbumRipper {
@@ -47,19 +58,19 @@ public boolean canRip(URL url) {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String u = url.toExternalForm();
// Strip '/u/' from URL
u = u.replaceAll("reddit\\.com/u/", "reddit.com/user/");
- return new URL(u);
+ return new URI(u).toURL();
}
- private URL getJsonURL(URL url) throws MalformedURLException {
+ private URL getJsonURL(URL url) throws MalformedURLException, URISyntaxException {
// Convert gallery to post link and append ".json"
Pattern p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/gallery/([a-zA-Z0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
- return new URL("https://reddit.com/" +m.group(m.groupCount())+ ".json");
+ return new URI("https://reddit.com/" +m.group(m.groupCount())+ ".json").toURL();
}
// Append ".json" to URL in appropriate location.
@@ -67,28 +78,32 @@ private URL getJsonURL(URL url) throws MalformedURLException {
if (url.getQuery() != null) {
result += "?" + url.getQuery();
}
- return new URL(result);
+ return new URI(result).toURL();
}
@Override
public void rip() throws IOException {
- URL jsonURL = getJsonURL(this.url);
- while (true) {
- if (shouldAddURL()) {
- sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_COMPLETE_HISTORY, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
- break;
- }
- jsonURL = getAndParseAndReturnNext(jsonURL);
- if (jsonURL == null || isThisATest() || isStopped()) {
- break;
+ try {
+ URL jsonURL = getJsonURL(this.url);
+ while (true) {
+ if (shouldAddURL()) {
+ sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_COMPLETE_HISTORY, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
+ break;
+ }
+ jsonURL = getAndParseAndReturnNext(jsonURL);
+ if (jsonURL == null || isThisATest() || isStopped()) {
+ break;
+ }
}
+ } catch (URISyntaxException e) {
+ new IOException(e.getMessage());
}
waitForThreads();
}
- private URL getAndParseAndReturnNext(URL url) throws IOException {
+ private URL getAndParseAndReturnNext(URL url) throws IOException, URISyntaxException {
JSONArray jsonArray = getJsonArrayFromURL(url), children;
JSONObject json, data;
URL nextURL = null;
@@ -103,7 +118,19 @@ private URL getAndParseAndReturnNext(URL url) throws IOException {
}
children = data.getJSONArray("children");
for (int j = 0; j < children.length(); j++) {
- parseJsonChild(children.getJSONObject(j));
+ try {
+ parseJsonChild(children.getJSONObject(j));
+
+ if (children.getJSONObject(j).getString("kind").equals("t3") &&
+ children.getJSONObject(j).getJSONObject("data").getBoolean("is_self")
+ ) {
+ URL selfPostURL = new URI(children.getJSONObject(j).getJSONObject("data").getString("url")).toURL();
+ System.out.println(selfPostURL.toExternalForm());
+ saveText(getJsonArrayFromURL(getJsonURL(selfPostURL)));
+ }
+ } catch (Exception e) {
+ LOGGER.debug("at index " + i + ", for this data: " + data.toString() + e);
+ }
}
if (data.has("after") && !data.isNull("after")) {
String nextURLString = Utils.stripURLParameter(url.toExternalForm(), "after");
@@ -113,7 +140,7 @@ private URL getAndParseAndReturnNext(URL url) throws IOException {
else {
nextURLString = nextURLString.concat("?after=" + data.getString("after"));
}
- nextURL = new URL(nextURLString);
+ nextURL = new URI(nextURLString).toURL();
}
}
@@ -225,8 +252,123 @@ private void handleBody(String body, String id, String title) {
}
}
+ private void saveText(JSONArray jsonArray) throws JSONException {
+ Path saveFileAs;
+
+ JSONObject selfPost = jsonArray.getJSONObject(0).getJSONObject("data")
+ .getJSONArray("children").getJSONObject(0).getJSONObject("data");
+ JSONArray comments = jsonArray.getJSONObject(1).getJSONObject("data")
+ .getJSONArray("children");
+
+ if (selfPost.getString("selftext").equals("")) { return; }
+
+ final String title = selfPost.getString("title");
+ final String id = selfPost.getString("id");
+ final String author = selfPost.getString("author");
+ final String creationDate = new Date((long) selfPost.getInt("created") * 1000).toString();
+ final String subreddit = selfPost.getString("subreddit");
+ final String selfText = selfPost.getString("selftext_html");
+ final String permalink = selfPost.getString("url");
+
+ String html = TagCreator.html(
+ head(
+ title(title),
+ style(rawHtml(HTML_STYLING))
+ ),
+ body(
+ div(
+ h1(title),
+ a(subreddit).withHref("https://www.reddit.com/r/" + subreddit),
+ a("Original").withHref(permalink),
+ br()
+ ).withClass("thing"),
+ div(
+ div(
+ span(
+ a(author).withHref("https://www.reddit.com/u/" + author)
+ ).withClass("author op")
+ ).withClass("thing oppost")
+ .withText(creationDate)
+ .with(rawHtml(Jsoup.parse(selfText).text()))
+ ).withClass("flex")
+ ).with(getComments(comments, author)),
+ script(rawHtml(HTML_SCRIPT))
+ ).renderFormatted();
+
+ try {
+ saveFileAs = Utils.getPath(workingDir
+ + "/"
+ + id + "_" + Utils.filesystemSafe(title)
+ + ".html");
+ OutputStream out = Files.newOutputStream(saveFileAs);
+ out.write(html.getBytes());
+ out.close();
+ } catch (IOException e) {
+ LOGGER.error("[!] Error creating save file path for description '" + url + "':", e);
+ return;
+ }
+
+ LOGGER.debug("Downloading " + url + "'s self post to " + saveFileAs);
+ super.retrievingSource(permalink);
+ if (!Files.exists(saveFileAs.getParent())) {
+ LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
+ try {
+ Files.createDirectory(saveFileAs.getParent());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ private ContainerTag getComments(JSONArray comments, String author) {
+ ContainerTag commentsDiv = div().withId("comments");
+
+ for (int i = 0; i < comments.length(); i++) {
+ JSONObject data = comments.getJSONObject(i).getJSONObject("data");
+
+ try {
+ ContainerTag commentDiv =
+ div(
+ span(data.getString("author")).withClasses("author", iff(data.getString("author").equals(author), "op")),
+ a(new Date((long) data.getInt("created") * 1000).toString()).withHref("#" + data.getString("name"))
+ ).withClass("thing comment").withId(data.getString("name"))
+ .with(rawHtml(Jsoup.parse(data.getString("body_html")).text()));
+ getNestedComments(data, commentDiv, author);
+ commentsDiv.with(commentDiv);
+ } catch (Exception e) {
+ LOGGER.debug("at index " + i + ", for this data: " + data.toString() + e);
+ }
+ }
+ return commentsDiv;
+ }
+
+ private ContainerTag getNestedComments(JSONObject data, ContainerTag parentDiv, String author) {
+ if (data.has("replies") && data.get("replies") instanceof JSONObject) {
+ JSONArray commentChildren = data.getJSONObject("replies").getJSONObject("data").getJSONArray("children");
+ for (int i = 0; i < commentChildren.length(); i++) {
+ JSONObject nestedComment = commentChildren
+ .getJSONObject(i).getJSONObject("data");
+
+ String nestedCommentAuthor = nestedComment.optString("author");
+ if (!nestedCommentAuthor.isBlank()) {
+ ContainerTag childDiv =
+ div(
+ div(
+ span(nestedCommentAuthor).withClasses("author", iff(nestedCommentAuthor.equals(author), "op")),
+ a(new Date((long) nestedComment.getInt("created") * 1000).toString()).withHref("#" + nestedComment.getString("name"))
+ ).withClass("comment").withId(nestedComment.getString("name"))
+ .with(rawHtml(Jsoup.parse(nestedComment.getString("body_html")).text()))
+ ).withClass("child");
+
+ parentDiv.with(getNestedComments(nestedComment, childDiv, author));
+ }
+ }
+ }
+ return parentDiv;
+ }
+
private URL parseRedditVideoMPD(String vidURL) {
- org.jsoup.nodes.Document doc = null;
+ org.jsoup.nodes.Document doc;
try {
doc = Http.url(vidURL + "/DASHPlaylist.mpd").ignoreContentType().get();
int largestHeight = 0;
@@ -242,8 +384,8 @@ private URL parseRedditVideoMPD(String vidURL) {
baseURL = doc.select("MPD > Period > AdaptationSet > Representation[height=" + height + "]").select("BaseURL").text();
}
}
- return new URL(vidURL + "/" + baseURL);
- } catch (IOException e) {
+ return new URI(vidURL + "/" + baseURL).toURL();
+ } catch (IOException | URISyntaxException e) {
e.printStackTrace();
}
return null;
@@ -253,8 +395,8 @@ private URL parseRedditVideoMPD(String vidURL) {
private void handleURL(String theUrl, String id, String title) {
URL originalURL;
try {
- originalURL = new URL(theUrl);
- } catch (MalformedURLException e) {
+ originalURL = new URI(theUrl).toURL();
+ } catch (MalformedURLException | URISyntaxException e) {
return;
}
String subdirectory = "";
@@ -274,21 +416,21 @@ private void handleURL(String theUrl, String id, String title) {
Matcher m = p.matcher(url);
if (m.matches()) {
// It's from reddituploads. Assume .jpg extension.
- String savePath = this.workingDir + File.separator;
- savePath += id + "-" + m.group(1) + title + ".jpg";
- addURLToDownload(urls.get(0), new File(savePath));
+ String savePath = this.workingDir + "/";
+ savePath += id + "-" + m.group(1) + Utils.filesystemSafe(title) + ".jpg";
+ addURLToDownload(urls.get(0), Utils.getPath(savePath));
}
if (url.contains("v.redd.it")) {
- String savePath = this.workingDir + File.separator;
- savePath += id + "-" + url.split("/")[3] + title + ".mp4";
+ String savePath = this.workingDir + "/";
+ savePath += id + "-" + url.split("/")[3] + Utils.filesystemSafe(title) + ".mp4";
URL urlToDownload = parseRedditVideoMPD(urls.get(0).toExternalForm());
if (urlToDownload != null) {
LOGGER.info("url: " + urlToDownload + " file: " + savePath);
- addURLToDownload(urlToDownload, new File(savePath));
+ addURLToDownload(urlToDownload, Utils.getPath(savePath));
}
}
else {
- addURLToDownload(urls.get(0), id + title, "", theUrl, null);
+ addURLToDownload(urls.get(0), Utils.filesystemSafe(id + title), "", theUrl, null);
}
} else if (urls.size() > 1) {
for (int i = 0; i < urls.size(); i++) {
@@ -307,7 +449,6 @@ private void handleGallery(JSONArray data, JSONObject metadata, String id, Strin
if (Utils.getConfigBoolean("reddit.use_sub_dirs", true)) {
if (Utils.getConfigBoolean("album_titles.save", true)) {
subdirectory = title;
- title = "-" + title + "-";
}
}
for (int i = 0; i < data.length(); i++) {
@@ -320,12 +461,12 @@ private void handleGallery(JSONArray data, JSONObject metadata, String id, Strin
try {
URL mediaURL;
if (!media.getJSONObject("s").isNull("gif")) {
- mediaURL = new URL(media.getJSONObject("s").getString("gif").replaceAll("&", "&"));
+ mediaURL = new URI(media.getJSONObject("s").getString("gif").replaceAll("&", "&")).toURL();
} else {
- mediaURL = new URL(media.getJSONObject("s").getString("u").replaceAll("&", "&"));
+ mediaURL = new URI(media.getJSONObject("s").getString("u").replaceAll("&", "&")).toURL();
}
addURLToDownload(mediaURL, prefix, subdirectory);
- } catch (MalformedURLException | JSONException e) {
+ } catch (MalformedURLException | JSONException | URISyntaxException e) {
LOGGER.error("[!] Unable to parse gallery JSON:\ngallery_data:\n" + data +"\nmedia_metadata:\n" + metadata);
}
}
@@ -369,4 +510,7 @@ public String getGID(URL url) throws MalformedURLException {
throw new MalformedURLException("Only accepts user pages, subreddits, post, or gallery can't understand " + url);
}
+ private static final String HTML_STYLING = " .author { font-weight: bold; } .op { color: blue; } .comment { border: 0px; margin: 0 0 25px; padding-left: 5px; } .child { margin: 2px 0 0 20px; border-left: 2px dashed #AAF; } .collapsed { background: darkgrey; margin-bottom: 0; } .collapsed > div { display: none; } .md { max-width: 840px; padding-right: 1em; } h1 { margin: 0; } body { position: relative; background-color: #eeeeec; color: #00000a; font-weight: 400; font-style: normal; font-variant: normal; font-family: Helvetica,Arial,sans-serif; line-height: 1.4 } blockquote { margin: 5px 5px 5px 15px; padding: 1px 1px 1px 15px; max-width: 60em; border: 1px solid #ccc; border-width: 0 0 0 1px; } pre { white-space: pre-wrap; } img, video { max-width: 60vw; max-height: 90vh; object-fit: contain; } .thing { overflow: hidden; margin: 0 5px 3px 40px; border: 1px solid #e0e0e0; background-color: #fcfcfb; } :target > .md { border: 5px solid blue; } .post { margin-bottom: 20px; margin-top: 20px; } .gold { background: goldenrod; } .silver { background: silver; } .platinum { background: aqua; } .deleted { background: #faa; } .md.deleted { background: inherit; border: 5px solid #faa; } .oppost { background-color: #EEF; } blockquote > p { margin: 0; } #related { max-height: 20em; overflow-y: scroll; background-color: #F4FFF4; } #related h3 { position: sticky; top: 0; background-color: white; } .flex { display: flex; flex-flow: wrap; flex-direction: row-reverse; justify-content: flex-end; } ";
+ private static final String HTML_SCRIPT = "document.addEventListener('mousedown', function(e) { var t = e.target; if (t.className == 'author') { t = t.parentElement; } if (t.classList.contains('comment')) { t.classList.toggle('collapsed'); e.preventDefault(); e.stopPropagation(); return false; } });";
+
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java
index 17105ee45..e82db4b28 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java
@@ -1,35 +1,57 @@
package com.rarchives.ripme.ripper.rippers;
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
-import org.json.JSONArray;
+
import org.json.JSONObject;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
+import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-public class RedgifsRipper extends AbstractHTMLRipper {
+import org.apache.http.client.utils.URIBuilder;
+
+import com.rarchives.ripme.ripper.AbstractJSONRipper;
+
+public class RedgifsRipper extends AbstractJSONRipper {
private static final String HOST = "redgifs.com";
private static final String HOST_2 = "gifdeliverynetwork.com";
- String username = "";
- String cursor = "";
- String count = "100";
+ private static final String GIFS_DETAIL_ENDPOINT = "https://api.redgifs.com/v2/gifs/%s";
+ private static final String USERS_SEARCH_ENDPOINT = "https://api.redgifs.com/v2/users/%s/search";
+ private static final String GALLERY_ENDPOINT = "https://api.redgifs.com/v2/gallery/%s";
+ private static final String SEARCH_ENDPOINT = "https://api.redgifs.com/v2/search/%s";
+ private static final String TAGS_ENDPOINT = "https://api.redgifs.com/v2/gifs/search";
+ private static final String TEMPORARY_AUTH_ENDPOINT = "https://api.redgifs.com/v2/auth/temporary";
+ private static final Pattern PROFILE_PATTERN = Pattern.compile("^https?://[a-zA-Z0-9.]*redgifs\\.com/users/([a-zA-Z0-9_.-]+).*$");
+ private static final Pattern SEARCH_PATTERN = Pattern.compile("^https?:\\/\\/[a-zA-Z0-9.]*redgifs\\.com\\/search(?:\\/[a-zA-Z]+)?\\?.*?query=([a-zA-Z0-9-_+%]+).*$");
+ private static final Pattern TAGS_PATTERN = Pattern.compile("^https?:\\/\\/[a-zA-Z0-9.]*redgifs\\.com\\/gifs\\/([a-zA-Z0-9_.,-]+).*$");
+ private static final Pattern SINGLETON_PATTERN = Pattern.compile("^https?://[a-zA-Z0-9.]*redgifs\\.com/watch/([a-zA-Z0-9_-]+).*$");
+
+ /**
+ * Keep a single auth token for the complete lifecycle of the app.
+ * This should prevent fetching of multiple tokens.
+ */
+ private static String authToken = "";
- String searchText = "";
- int searchCount = 150;
- int searchStart = 0;
+ String username = "";
+ int count = 40;
+ int currentPage = 1;
+ int maxPages = 1;
- public RedgifsRipper(URL url) throws IOException {
- super(new URL(url.toExternalForm().replace("thumbs.", "")));
+ public RedgifsRipper(URL url) throws IOException, URISyntaxException {
+ super(new URI(url.toExternalForm().replace("thumbs.", "")).toURL());
}
@Override
@@ -46,41 +68,57 @@ public boolean canRip(URL url) {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String sUrl = url.toExternalForm();
sUrl = sUrl.replace("/gifs/detail", "");
sUrl = sUrl.replace("/amp", "");
sUrl = sUrl.replace("gifdeliverynetwork.com", "redgifs.com/watch");
- return new URL(sUrl);
+ return new URI(sUrl).toURL();
}
public Matcher isProfile() {
- Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/users/([a-zA-Z0-9_-]+).*$");
- return p.matcher(url.toExternalForm());
+ return PROFILE_PATTERN.matcher(url.toExternalForm());
}
public Matcher isSearch() {
- Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/gifs/browse/([a-zA-Z0-9_-]+).*$");
- return p.matcher(url.toExternalForm());
+ return SEARCH_PATTERN.matcher(url.toExternalForm());
+ }
+
+ public Matcher isTags() {
+ return TAGS_PATTERN.matcher(url.toExternalForm());
}
public Matcher isSingleton() {
- Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_-]+).*$");
- return p.matcher(url.toExternalForm());
+ return SINGLETON_PATTERN.matcher(url.toExternalForm());
}
@Override
- public Document getFirstPage() throws IOException {
- if (!isProfile().matches() && !isSearch().matches()) {
- return Http.url(url).get();
- } else if (isSearch().matches()) {
- searchText = getGID(url).replace("-", " ");
- return Http.url(
- new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
- } else {
- username = getGID(url);
- return Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count))
- .ignoreContentType().get();
+ public JSONObject getFirstPage() throws IOException {
+ try {
+ if (authToken == null || authToken.isBlank()) {
+ fetchAuthToken();
+ }
+
+ if (isSingleton().matches()) {
+ maxPages = 1;
+ String gifDetailsURL = String.format(GIFS_DETAIL_ENDPOINT, getGID(url));
+ return Http.url(gifDetailsURL).header("Authorization", "Bearer " + authToken).getJSON();
+ } else if (isSearch().matches() || isTags().matches()) {
+ var json = Http.url(getSearchOrTagsURL()).header("Authorization", "Bearer " + authToken).getJSON();
+ maxPages = json.getInt("pages");
+ return json;
+ } else {
+ username = getGID(url);
+ var uri = new URIBuilder(String.format(USERS_SEARCH_ENDPOINT, username));
+ uri.addParameter("order", "new");
+ uri.addParameter("count", Integer.toString(count));
+ uri.addParameter("page", Integer.toString(currentPage));
+ var json = Http.url(uri.build().toURL()).header("Authorization", "Bearer " + authToken).getJSON();
+ maxPages = json.getInt("pages");
+ return json;
+ }
+ } catch (URISyntaxException e) {
+ throw new IOException("Failed to build first page url", e);
}
}
@@ -91,14 +129,35 @@ public void downloadURL(URL url, int index) {
@Override
public String getGID(URL url) throws MalformedURLException {
-
Matcher m = isProfile();
if (m.matches()) {
return m.group(1);
}
m = isSearch();
if (m.matches()) {
- return m.group(1);
+ var sText = m.group(1);
+ if (sText == null || sText.isBlank()) {
+ throw new MalformedURLException(String.format("Expected redgifs.com/search?query=searchtext\n Got %s", url));
+ }
+ sText = URLDecoder.decode(sText, StandardCharsets.UTF_8);
+ sText = sText.replaceAll("[^A-Za-z0-9_-]", "-");
+ return sText;
+ }
+ m = isTags();
+ if (m.matches()) {
+ var sText = m.group(1);
+ if (sText == null || sText.isBlank()) {
+ throw new MalformedURLException(String.format("Expected redgifs.com/gifs/searchtags\n Got %s", url));
+ }
+ sText = URLDecoder.decode(sText, StandardCharsets.UTF_8);
+ var list = Arrays.asList(sText.split(","));
+ if (list.size() > 1) {
+ LOGGER.warn("Url with multiple tags found. \nThey will be sorted alphabetically for folder name.");
+ }
+ Collections.sort(list);
+ var gid = list.stream().reduce("", (acc, val) -> acc.concat("_" + val));
+ gid = gid.replaceAll("[^A-Za-z0-9_-]", "-");
+ return gid;
}
m = isSingleton();
if (m.matches()) {
@@ -106,96 +165,206 @@ public String getGID(URL url) throws MalformedURLException {
}
throw new MalformedURLException(
"Expected redgifs.com format: "
- + "redgifs.com/id or "
- + "thumbs.redgifs.com/id.gif"
+ + "redgifs.com/watch/id or "
+ + "redgifs.com/users/id or "
+ + "redgifs.com/gifs/id or "
+ + "redgifs.com/search?query=text"
+ " Got: " + url);
}
- private String stripHTMLTags(String t) {
- t = t.replaceAll("\n" +
- " \n" +
- " ", "");
- t = t.replaceAll("\n" +
- "", "");
- t = t.replaceAll("\n", "");
- t = t.replaceAll("=\"\"", "");
- return t;
- }
-
@Override
- public Document getNextPage(Document doc) throws IOException {
- if (isSearch().matches()) {
- Document d = Http.url(
- new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText
- + "&count=" + searchCount + "&start=" + searchCount*++searchStart))
- .ignoreContentType().get();
- return (hasURLs(d).isEmpty()) ? null : d;
+ public JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException {
+ if (currentPage == maxPages || isSingleton().matches()) {
+ return null;
+ }
+ currentPage++;
+ if (isSearch().matches() || isTags().matches()) {
+ var json = Http.url(getSearchOrTagsURL()).header("Authorization", "Bearer " + authToken).getJSON();
+ // Handle rare maxPages change during a rip
+ maxPages = json.getInt("pages");
+ return json;
+ } else if (isProfile().matches()) {
+ var uri = new URIBuilder(String.format(USERS_SEARCH_ENDPOINT, getGID(url)));
+ uri.addParameter("order", "new");
+ uri.addParameter("count", Integer.toString(count));
+ uri.addParameter("page", Integer.toString(currentPage));
+ var json = Http.url(uri.build().toURL()).header("Authorization", "Bearer " + authToken).getJSON();
+ // Handle rare maxPages change during a rip
+ maxPages = json.getInt("pages");
+ return json;
} else {
- if (cursor.equals("")) {
- return null;
- } else {
- Document d = Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
- return (hasURLs(d).isEmpty()) ? null : d;
- }
+ return null;
}
}
@Override
- public List getURLsFromPage(Document doc) {
+ public List getURLsFromJSON(JSONObject json) {
List result = new ArrayList<>();
- if (isProfile().matches() || isSearch().matches()) {
- result = hasURLs(doc);
- } else {
- Elements videos = doc.select("script");
- for (Element el : videos) {
- String json = el.html();
- if (json.startsWith("{")) {
- JSONObject page = new JSONObject(json);
- result.add(page.getJSONObject("video").getString("contentUrl"));
+ if (isProfile().matches() || isSearch().matches() || isTags().matches()) {
+ var gifs = json.getJSONArray("gifs");
+ for (var gif : gifs) {
+ if (((JSONObject)gif).isNull("gallery")) {
+ var hdURL = ((JSONObject)gif).getJSONObject("urls").getString("hd");
+ result.add(hdURL);
+ } else {
+ var galleryID = ((JSONObject)gif).getString("gallery");
+ var gifID = ((JSONObject)gif).getString("id");
+ result.addAll(getURLsForGallery(galleryID, gifID));
}
}
+ } else {
+ var gif = json.getJSONObject("gif");
+ if (gif.isNull("gallery")) {
+ String hdURL = gif.getJSONObject("urls").getString("hd");
+ result.add(hdURL);
+ } else {
+ var galleryID = gif.getString("gallery");
+ var gifID = gif.getString("id");
+ result.addAll(getURLsForGallery(galleryID, gifID));
+ }
}
return result;
}
- /**
- * Helper method for retrieving URLs.
- * @param doc Document of the URL page to look through
- * @return List of URLs to download
+
+ /**
+ * Get all images for a gif url with multiple images
+ * @param galleryID gallery id
+ * @param gifID gif id with multiple images for logging
+ * @return List
*/
- public List hasURLs(Document doc) {
- List result = new ArrayList<>();
- JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
- JSONArray content = page.getJSONArray("gfycats");
- for (int i = 0; i < content.length(); i++) {
- result.add(content.getJSONObject(i).getString("mp4Url"));
+ private static List getURLsForGallery(String galleryID, String gifID) {
+ List list = new ArrayList<>();
+ if (galleryID == null || galleryID.isBlank()) {
+ return list;
}
- cursor = page.getString("cursor");
- return result;
+ try {
+ var json = Http.url(String.format(GALLERY_ENDPOINT, galleryID)).header("Authorization", "Bearer " + authToken).getJSON();
+ for (var gif : json.getJSONArray("gifs")) {
+ var hdURL = ((JSONObject)gif).getJSONObject("urls").getString("hd");
+ list.add(hdURL);
+ }
+ } catch (IOException e) {
+ LOGGER.error(String.format("Error fetching gallery %s for gif %s", galleryID, gifID), e);
+ }
+ return list;
}
-
/**
- * Helper method for retrieving video URLs.
- * @param url URL to gfycat page
+ * Static helper method for retrieving video URLs for usage in RipUtils.
+ * Most of the code is lifted from getFirstPage and getURLsFromJSON
+ * @param url URL to redgif page
* @return URL to video
* @throws IOException
*/
- public static String getVideoURL(URL url) throws IOException {
+ public static String getVideoURL(URL url) throws IOException, URISyntaxException {
LOGGER.info("Retrieving " + url.toExternalForm());
+ var m = SINGLETON_PATTERN.matcher(url.toExternalForm());
+ if (!m.matches()){
+ throw new IOException(String.format("Cannot fetch redgif url %s", url.toExternalForm()));
+ }
+ if (authToken == null || authToken.isBlank()){
+ fetchAuthToken();
+ }
+ var gid = m.group(1).split("-")[0];
+ var gifDetailsURL = String.format(GIFS_DETAIL_ENDPOINT, gid);
+ var json = Http.url(gifDetailsURL).header("Authorization", "Bearer " + authToken).getJSON();
+ var gif = json.getJSONObject("gif");
+ if (!gif.isNull("gallery")){
+ // TODO check how to handle a image gallery
+ throw new IOException(String.format("Multiple images found for url %s", url));
+ }
+ return gif.getJSONObject("urls").getString("hd");
+ }
+
- //Sanitize the URL first
- url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
+ /**
+ * Fetch a temorary auth token for the rip
+ * @throws IOException
+ */
+ private static void fetchAuthToken() throws IOException{
+ var json = Http.url(TEMPORARY_AUTH_ENDPOINT).getJSON();
+ var token = json.getString("token");
+ authToken = token;
+ LOGGER.info("Incase of redgif 401 errors, please restart the app to refresh the auth token");
+ }
+
+ /**
+ * Map browser url query params to search or tags endpoint query params and return the complete url.
+ *
+ * Search text for search url comes from the query params, whereas search text for tags url comes from the path.
+ *
+ * Tab type for search url comes from the path whereas, tab type for tags url comes from query params.
+ * @return Search or tags endpoint url
+ */
+ private URL getSearchOrTagsURL() throws IOException, URISyntaxException {
+ URIBuilder uri;
+ Map endpointQueryParams = new HashMap<>();
+ var browserURLQueryParams = new URIBuilder(url.toString()).getQueryParams();
+ for (var qp : browserURLQueryParams) {
+ var name = qp.getName();
+ var value = qp.getValue();
+ switch (name) {
+ case "query":
+ endpointQueryParams.put("query", URLDecoder.decode(value, StandardCharsets.UTF_8));
+ break;
+ case "tab":
+ switch (value) {
+ case "gifs" -> endpointQueryParams.put("type", "g");
+ case "images" -> endpointQueryParams.put("type", "i");
+ default -> LOGGER.warn(String.format("Unsupported tab for tags url %s", value));
+ }
+ break;
+ case "verified":
+ if (value != null && value.equals("1")) {
+ if (isTags().matches()) {
+ endpointQueryParams.put("verified", "y");
+ } else {
+ endpointQueryParams.put("verified", "yes");
+ }
+ }
+ break;
+ case "order":
+ endpointQueryParams.put("order", value);
+ break;
+ case "viewMode":
+ break;
+ default:
+ LOGGER.warn(String.format("Unexpected query param %s for search url. Skipping.", name));
+ }
+ }
- Document doc = Http.url(url).get();
- Elements videos = doc.select("script");
- for (Element el : videos) {
- String json = el.html();
- if (json.startsWith("{")) {
- JSONObject page = new JSONObject(json);
- return page.getJSONObject("video").getString("contentUrl");
+ // Build the search or tags url and add missing query params if any
+ if (isTags().matches()) {
+ var subpaths = url.getPath().split("/");
+ if (subpaths.length != 0) {
+ endpointQueryParams.put("search_text", subpaths[subpaths.length-1]);
+ } else {
+ throw new IOException("Failed to get search tags for url");
+ }
+ // Check if it is the main tags page with all gifs, images, creator etc
+ if (!endpointQueryParams.containsKey("type")) {
+ LOGGER.warn("No tab selected, defaulting to gifs");
+ endpointQueryParams.put("type", "g");
}
+ uri = new URIBuilder(TAGS_ENDPOINT);
+ } else {
+ var tabType = "gifs";
+ var subpaths = url.getPath().split("/");
+ if (subpaths.length != 0) {
+ switch (subpaths[subpaths.length-1]) {
+ case "gifs" -> tabType = "gifs";
+ case "images" -> tabType = "images";
+ case "search" -> LOGGER.warn("No tab selected, defaulting to gifs");
+ default -> LOGGER.warn(String.format("Unsupported search tab %s, defaulting to gifs", subpaths[subpaths.length-1]));
+ }
+ }
+ uri = new URIBuilder(String.format(SEARCH_ENDPOINT, tabType));
}
- throw new IOException();
- }
+ endpointQueryParams.put("page", Integer.toString(currentPage));
+ endpointQueryParams.put("count", Integer.toString(count));
+ endpointQueryParams.forEach((k, v) -> uri.addParameter(k, v));
+
+ return uri.build().toURL();
+ }
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/Rule34Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/Rule34Ripper.java
index 681738fa0..c7245739e 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/Rule34Ripper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/Rule34Ripper.java
@@ -2,6 +2,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -51,13 +53,13 @@ public String getGID(URL url) throws MalformedURLException {
"rule34.xxx/index.php?page=post&s=list&tags=TAG - got " + url + " instead");
}
- public URL getAPIUrl() throws MalformedURLException {
- URL urlToReturn = new URL("https://rule34.xxx/index.php?page=dapi&s=post&q=index&limit=100&tags=" + getGID(url));
+ public URL getAPIUrl() throws MalformedURLException, URISyntaxException {
+ URL urlToReturn = new URI("https://rule34.xxx/index.php?page=dapi&s=post&q=index&limit=100&tags=" + getGID(url)).toURL();
return urlToReturn;
}
@Override
- public Document getFirstPage() throws IOException {
+ public Document getFirstPage() throws IOException, URISyntaxException {
apiUrl = getAPIUrl().toExternalForm();
// "url" is an instance field of the superclass
return Http.url(getAPIUrl()).get();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RulePornRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RulePornRipper.java
index c9c487a77..be33c945f 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/RulePornRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RulePornRipper.java
@@ -40,11 +40,6 @@ public String getGID(URL url) throws MalformedURLException {
"Expected ruleporn.com URL format: " + "ruleporn.com/NAME - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java
index 7e0c1c46e..2df6ab2c4 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java
@@ -1,293 +1,293 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.net.*;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.java_websocket.client.WebSocketClient;
-
-import org.apache.http.NameValuePair;
-import org.apache.http.client.utils.URLEncodedUtils;
-import org.java_websocket.handshake.ServerHandshake;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-import com.rarchives.ripme.ripper.AbstractJSONRipper;
-
-public class ScrolllerRipper extends AbstractJSONRipper {
-
- public ScrolllerRipper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public String getHost() {
- return "scrolller";
- }
- @Override
- public String getDomain() {
- return "scrolller.com";
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- // Typical URL is: https://scrolller.com/r/subreddit
- // Parameters like "filter" and "sort" can be passed (ex: https://scrolller.com/r/subreddit?filter=xxx&sort=yyyy)
- Pattern p = Pattern.compile("^https?://scrolller\\.com/r/([a-zA-Z0-9]+).*?$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
- throw new MalformedURLException("Expected scrolller.com URL format: " +
- "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead");
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index));
- }
-
-
- private JSONObject prepareQuery(String iterator, String gid, String sortByString) throws IOException, URISyntaxException {
-
- String QUERY_NOSORT = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }";
- String QUERY_SORT = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }";
-
- String filterString = convertFilterString(getParameter(this.url,"filter"));
-
- JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase());
- JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", sortByString.equals("") ? QUERY_NOSORT : QUERY_SORT);
-
- if (iterator != null) {
- // Iterator is not present on the first page
- variablesObject.put("iterator", iterator);
- }
- if (!filterString.equals("NOFILTER")) {
- variablesObject.put("filter", filterString);
- }
-
- return sortByString.equals("") ? getPosts(finalQueryObject) : getPostsSorted(finalQueryObject);
-
- }
-
-
- public String convertFilterString(String filterParameter) {
- // Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query
- // I could basically remove the last "s" and call toUpperCase instead of this switch statement but this looks easier to read.
- switch (filterParameter.toLowerCase()) {
- case "pictures":
- return "PICTURE";
- case "videos":
- return "VIDEO";
- case "albums":
- return "ALBUM";
- case "":
- return "NOFILTER";
- default:
- LOGGER.error(String.format("Invalid filter %s using no filter",filterParameter));
- return "";
- }
- }
-
- public String getParameter(URL url, String parameter) throws MalformedURLException {
- // Gets passed parameters from the URL
- String toReplace = String.format("https://scrolller.com/r/%s?",getGID(url));
- List args= URLEncodedUtils.parse(url.toExternalForm(), Charset.defaultCharset());
- for (NameValuePair arg:args) {
- // First parameter contains part of the url so we have to remove it
- // Ex: for the url https://scrolller.com/r/CatsStandingUp?filter=xxxx&sort=yyyy
- // 1) arg.getName() => https://scrolller.com/r/CatsStandingUp?filter
- // 2) arg.getName() => sort
-
- if (arg.getName().replace(toReplace,"").toLowerCase().equals((parameter))) {
- return arg.getValue();
- }
- }
- return "";
- }
-
- private JSONObject getPosts(JSONObject data) {
- // The actual GraphQL query call
-
- try {
- String url = "https://api.scrolller.com/api/v2/graphql";
-
- URL obj = new URL(url);
- HttpURLConnection conn = (HttpURLConnection) obj.openConnection();
- conn.setReadTimeout(5000);
- conn.addRequestProperty("Accept-Language", "en-US,en;q=0.8");
- conn.addRequestProperty("User-Agent", "Mozilla");
- conn.addRequestProperty("Referer", "scrolller.com");
-
- conn.setDoOutput(true);
-
- OutputStreamWriter w = new OutputStreamWriter(conn.getOutputStream(), "UTF-8");
-
- w.write(data.toString());
- w.close();
-
- BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
- String inputLine;
- StringBuffer jsonString = new StringBuffer();
-
- while ((inputLine = in.readLine()) != null) {
- jsonString.append(inputLine);
- }
-
- in.close();
- conn.disconnect();
-
- return new JSONObject(jsonString.toString());
-
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- return new JSONObject("{}");
- }
-
- private JSONObject getPostsSorted(JSONObject data) throws MalformedURLException {
-
- // The actual GraphQL query call (if sort parameter is present)
- try {
-
- ArrayList postsJsonStrings = new ArrayList<>();
-
- WebSocketClient wsc = new WebSocketClient(new URI("wss://api.scrolller.com/api/v2/graphql")) {
- @Override
- public void onOpen(ServerHandshake serverHandshake) {
- // As soon as the WebSocket connects send our query
- this.send(data.toString());
- }
-
- @Override
- public void onMessage(String s) {
- postsJsonStrings.add(s);
- if (new JSONObject(s).getJSONObject("data").getJSONObject("fetchSubreddit").has("iterator")) {
- this.close();
- }
- }
-
- @Override
- public void onClose(int i, String s, boolean b) {
- }
-
- @Override
- public void onError(Exception e) {
- LOGGER.error(String.format("WebSocket error, server reported %s", e.getMessage()));
- }
- };
- wsc.connect();
-
- while (!wsc.isClosed()) {
- // Posts list is not over until the connection closes.
- }
-
- JSONObject finalObject = new JSONObject();
- JSONArray posts = new JSONArray();
-
- // Iterator is the last object in the post list, let's duplicate it in his own object for clarity.
- finalObject.put("iterator", new JSONObject(postsJsonStrings.get(postsJsonStrings.size()-1)));
-
- for (String postString : postsJsonStrings) {
- posts.put(new JSONObject(postString));
- }
- finalObject.put("posts", posts);
-
- if (finalObject.getJSONArray("posts").length() == 1 && !finalObject.getJSONArray("posts").getJSONObject(0).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) {
- // Only iterator, no posts.
- return null;
- }
-
- return finalObject;
-
-
- } catch (URISyntaxException ue) {
- // Nothing to catch, it's an hardcoded URI.
- }
-
- return null;
- }
-
-
- @Override
- protected List getURLsFromJSON(JSONObject json) throws JSONException {
-
- boolean sortRequested = json.has("posts");
-
- int bestArea = 0;
- String bestUrl = "";
- List list = new ArrayList<>();
-
- JSONArray itemsList = sortRequested ? json.getJSONArray("posts") : json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items");
-
- for (Object item : itemsList) {
-
- if (sortRequested && !((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) {
- continue;
- }
-
- JSONArray sourcesTMP = sortRequested ? ((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").getJSONArray("mediaSources") : ((JSONObject) item).getJSONArray("mediaSources");
- for (Object sourceTMP : sourcesTMP)
- {
- int widthTMP = ((JSONObject) sourceTMP).getInt("width");
- int heightTMP = ((JSONObject) sourceTMP).getInt("height");
- int areaTMP = widthTMP * heightTMP;
-
- if (areaTMP > bestArea) {
- bestArea = widthTMP;
- bestUrl = ((JSONObject) sourceTMP).getString("url");
- }
- }
- list.add(bestUrl);
- bestUrl = "";
- bestArea = 0;
- }
-
- return list;
- }
-
- @Override
- protected JSONObject getFirstPage() throws IOException {
- try {
- return prepareQuery(null, this.getGID(url), getParameter(url,"sort"));
- } catch (URISyntaxException e) {
- LOGGER.error(String.format("Error obtaining first page: %s", e.getMessage()));
- return null;
- }
- }
-
- @Override
- public JSONObject getNextPage(JSONObject source) throws IOException {
- // Every call the the API contains an "iterator" string that we need to pass to the API to get the next page
- // Checking if iterator is null is not working for some reason, hence why the weird "iterator.toString().equals("null")"
-
- Object iterator = null;
- if (source.has("iterator")) {
- // Sort requested, custom JSON.
- iterator = source.getJSONObject("iterator").getJSONObject("data").getJSONObject("fetchSubreddit").get("iterator");
- } else {
- iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator");
- }
-
- if (!iterator.toString().equals("null")) {
- // Need to change page.
- try {
- return prepareQuery(iterator.toString(), this.getGID(url), getParameter(url,"sort"));
- } catch (URISyntaxException e) {
- LOGGER.error(String.format("Error changing page: %s", e.getMessage()));
- return null;
- }
- } else {
- return null;
- }
- }
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.net.*;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.java_websocket.client.WebSocketClient;
+
+import org.apache.http.NameValuePair;
+import org.apache.http.client.utils.URLEncodedUtils;
+import org.java_websocket.handshake.ServerHandshake;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+import com.rarchives.ripme.ripper.AbstractJSONRipper;
+
+public class ScrolllerRipper extends AbstractJSONRipper {
+
+ public ScrolllerRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "scrolller";
+ }
+ @Override
+ public String getDomain() {
+ return "scrolller.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ // Typical URL is: https://scrolller.com/r/subreddit
+ // Parameters like "filter" and "sort" can be passed (ex: https://scrolller.com/r/subreddit?filter=xxx&sort=yyyy)
+ Pattern p = Pattern.compile("^https?://scrolller\\.com/r/([a-zA-Z0-9]+).*?$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected scrolller.com URL format: " +
+ "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead");
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+
+
+ private JSONObject prepareQuery(String iterator, String gid, String sortByString) throws IOException, URISyntaxException {
+
+ String QUERY_NOSORT = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }";
+ String QUERY_SORT = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }";
+
+ String filterString = convertFilterString(getParameter(this.url,"filter"));
+
+ JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase());
+ JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", sortByString.equals("") ? QUERY_NOSORT : QUERY_SORT);
+
+ if (iterator != null) {
+ // Iterator is not present on the first page
+ variablesObject.put("iterator", iterator);
+ }
+ if (!filterString.equals("NOFILTER")) {
+ variablesObject.put("filter", filterString);
+ }
+
+ return sortByString.equals("") ? getPosts(finalQueryObject) : getPostsSorted(finalQueryObject);
+
+ }
+
+
+ public String convertFilterString(String filterParameter) {
+ // Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query
+ // I could basically remove the last "s" and call toUpperCase instead of this switch statement but this looks easier to read.
+ switch (filterParameter.toLowerCase()) {
+ case "pictures":
+ return "PICTURE";
+ case "videos":
+ return "VIDEO";
+ case "albums":
+ return "ALBUM";
+ case "":
+ return "NOFILTER";
+ default:
+ LOGGER.error(String.format("Invalid filter %s using no filter",filterParameter));
+ return "";
+ }
+ }
+
+ public String getParameter(URL url, String parameter) throws MalformedURLException {
+ // Gets passed parameters from the URL
+ String toReplace = String.format("https://scrolller.com/r/%s?",getGID(url));
+ List args= URLEncodedUtils.parse(url.toExternalForm(), Charset.defaultCharset());
+ for (NameValuePair arg:args) {
+ // First parameter contains part of the url so we have to remove it
+ // Ex: for the url https://scrolller.com/r/CatsStandingUp?filter=xxxx&sort=yyyy
+ // 1) arg.getName() => https://scrolller.com/r/CatsStandingUp?filter
+ // 2) arg.getName() => sort
+
+ if (arg.getName().replace(toReplace,"").toLowerCase().equals((parameter))) {
+ return arg.getValue();
+ }
+ }
+ return "";
+ }
+
+ private JSONObject getPosts(JSONObject data) {
+ // The actual GraphQL query call
+
+ try {
+ String url = "https://api.scrolller.com/api/v2/graphql";
+
+ URL obj = new URI(url).toURL();
+ HttpURLConnection conn = (HttpURLConnection) obj.openConnection();
+ conn.setReadTimeout(5000);
+ conn.addRequestProperty("Accept-Language", "en-US,en;q=0.8");
+ conn.addRequestProperty("User-Agent", "Mozilla");
+ conn.addRequestProperty("Referer", "scrolller.com");
+
+ conn.setDoOutput(true);
+
+ OutputStreamWriter w = new OutputStreamWriter(conn.getOutputStream(), "UTF-8");
+
+ w.write(data.toString());
+ w.close();
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
+ String inputLine;
+ StringBuffer jsonString = new StringBuffer();
+
+ while ((inputLine = in.readLine()) != null) {
+ jsonString.append(inputLine);
+ }
+
+ in.close();
+ conn.disconnect();
+
+ return new JSONObject(jsonString.toString());
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ return new JSONObject("{}");
+ }
+
+ private JSONObject getPostsSorted(JSONObject data) throws MalformedURLException {
+
+ // The actual GraphQL query call (if sort parameter is present)
+ try {
+
+ ArrayList postsJsonStrings = new ArrayList<>();
+
+ WebSocketClient wsc = new WebSocketClient(new URI("wss://api.scrolller.com/api/v2/graphql")) {
+ @Override
+ public void onOpen(ServerHandshake serverHandshake) {
+ // As soon as the WebSocket connects send our query
+ this.send(data.toString());
+ }
+
+ @Override
+ public void onMessage(String s) {
+ postsJsonStrings.add(s);
+ if (new JSONObject(s).getJSONObject("data").getJSONObject("fetchSubreddit").has("iterator")) {
+ this.close();
+ }
+ }
+
+ @Override
+ public void onClose(int i, String s, boolean b) {
+ }
+
+ @Override
+ public void onError(Exception e) {
+ LOGGER.error(String.format("WebSocket error, server reported %s", e.getMessage()));
+ }
+ };
+ wsc.connect();
+
+ while (!wsc.isClosed()) {
+ // Posts list is not over until the connection closes.
+ }
+
+ JSONObject finalObject = new JSONObject();
+ JSONArray posts = new JSONArray();
+
+ // Iterator is the last object in the post list, let's duplicate it in his own object for clarity.
+ finalObject.put("iterator", new JSONObject(postsJsonStrings.get(postsJsonStrings.size()-1)));
+
+ for (String postString : postsJsonStrings) {
+ posts.put(new JSONObject(postString));
+ }
+ finalObject.put("posts", posts);
+
+ if (finalObject.getJSONArray("posts").length() == 1 && !finalObject.getJSONArray("posts").getJSONObject(0).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) {
+ // Only iterator, no posts.
+ return null;
+ }
+
+ return finalObject;
+
+
+ } catch (URISyntaxException ue) {
+ // Nothing to catch, it's an hardcoded URI.
+ }
+
+ return null;
+ }
+
+
+ @Override
+ protected List