forked from RipMeApp/ripme
-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Kemono Party Ripper and test case. (#163)
- Loading branch information
1 parent
ba29da0
commit c9e5685
Showing
2 changed files
with
288 additions
and
0 deletions.
There are no files selected for viewing
249 changes: 249 additions & 0 deletions
249
src/main/java/com/rarchives/ripme/ripper/rippers/KemonoPartyRipper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,249 @@ | ||
package com.rarchives.ripme.ripper.rippers; | ||
|
||
import com.rarchives.ripme.ripper.AbstractJSONRipper; | ||
import com.rarchives.ripme.utils.Http; | ||
import com.rarchives.ripme.utils.Utils; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.json.JSONArray; | ||
import org.json.JSONException; | ||
import org.json.JSONObject; | ||
|
||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* <a href="https://kemono.su/api/schema">See this link for the API schema</a>. | ||
*/ | ||
public class KemonoPartyRipper extends AbstractJSONRipper { | ||
private static final Logger LOGGER = LogManager.getLogger(KemonoPartyRipper.class); | ||
private static final String IMG_URL_BASE = "https://c3.kemono.su/data"; | ||
private static final String VID_URL_BASE = "https://c1.kemono.su/data"; | ||
private static final Pattern IMG_PATTERN = Pattern.compile("^.*\\.(jpg|jpeg|png|gif|apng|webp|tif|tiff)$", Pattern.CASE_INSENSITIVE); | ||
private static final Pattern VID_PATTERN = Pattern.compile("^.*\\.(webm|mp4|m4v)$", Pattern.CASE_INSENSITIVE); | ||
|
||
// just so we can return a JSONObject from getFirstPage | ||
private static final String KEY_WRAPPER_JSON_ARRAY = "array"; | ||
|
||
private static final String KEY_FILE = "file"; | ||
private static final String KEY_PATH = "path"; | ||
private static final String KEY_ATTACHMENTS = "attachments"; | ||
|
||
// One of "onlyfans" or "fansly", but might have others in future? | ||
private final String service; | ||
|
||
// Username of the page to be ripped | ||
private final String user; | ||
private int internalFileLimit; | ||
|
||
/** | ||
* This method is used to initialize a KemonoPartyRipper object with the given URL. | ||
* | ||
* @param url The URL from which to rip the content. | ||
* @throws IOException If an I/O error occurs while initializing the ripper. | ||
*/ | ||
public KemonoPartyRipper(URL url) throws IOException { | ||
super(url); | ||
List<String> pathElements = Arrays.stream(url.getPath().split("/")) | ||
.filter(element -> !element.isBlank()) | ||
.toList(); | ||
|
||
service = pathElements.get(0); | ||
user = pathElements.get(2); | ||
|
||
if (service == null || user == null || service.isBlank() || user.isBlank()) { | ||
LOGGER.warn("service=" + service + ", user=" + user); | ||
throw new MalformedURLException("Invalid kemono.party URL: " + url); | ||
} | ||
LOGGER.debug("Parsed service=" + service + " and user=" + user + " from " + url); | ||
} | ||
|
||
@Override | ||
protected String getDomain() { | ||
return "kemono.party"; | ||
} | ||
|
||
@Override | ||
public String getHost() { | ||
return "kemono.party"; | ||
} | ||
|
||
@Override | ||
public boolean canRip(URL url) { | ||
String host = url.getHost(); | ||
return host.endsWith("kemono.party") || host.endsWith("kemono.su"); | ||
} | ||
|
||
/** | ||
* Retrieves the GID (Group ID) for a given URL. | ||
* | ||
* @param url The URL for which to retrieve the GID. | ||
* @return The GID as a String. | ||
*/ | ||
@Override | ||
public String getGID(URL url) { | ||
return Utils.filesystemSafe(String.format("%s_%s", service, user)); | ||
} | ||
|
||
/** | ||
* Retrieves the first page of data from the API. | ||
* | ||
* @return The first page of data as a JSONObject. | ||
* @throws IOException If an I/O error occurs. | ||
*/ | ||
@Override | ||
protected JSONObject getFirstPage() throws IOException { | ||
String apiUrl = String.format("https://kemono.su/api/v1/%s/user/%s", service, user); | ||
String jsonArrayString = Http.url(apiUrl) | ||
.ignoreContentType() | ||
.response() | ||
.body(); | ||
JSONArray jsonArray = new JSONArray(jsonArrayString); | ||
internalFileLimit += 50; | ||
// Ideally we'd just return the JSONArray from here, but we have to wrap it in a JSONObject | ||
JSONObject wrapperObject = new JSONObject(); | ||
wrapperObject.put(KEY_WRAPPER_JSON_ARRAY, jsonArray); | ||
return wrapperObject; | ||
} | ||
|
||
/** | ||
* Retrieves the title of an album from the given URL. | ||
* | ||
* @param url The URL of the album. | ||
* @return The title of the album. | ||
* @throws MalformedURLException If the URL is malformed. | ||
*/ | ||
@Override | ||
public String getAlbumTitle(URL url) throws MalformedURLException { | ||
String title; | ||
try { | ||
//Gets artist name | ||
title = getHost() + "_" + getGID(url) + "_" + Http.url(url).get().select("meta[name=artist_name][content]").attr("content"); | ||
}catch (Exception e){ | ||
LOGGER.info("Failed to get album title, using id."); | ||
title = getGID(url); | ||
} | ||
return title; | ||
} | ||
|
||
/** | ||
* Retrieves the next page of data from the API. Kemono uses a 50 request limit, so you must offset by 50 each time. | ||
* | ||
* @param doc The current page data as a JSONObject. | ||
* @return The next page data as a JSONObject. | ||
* @throws IOException If an I/O error occurs. | ||
* @throws URISyntaxException If the URI syntax is invalid. | ||
*/ | ||
@Override | ||
protected JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException { | ||
String apiUrl = String.format("https://kemono.su/api/v1/%s/user/%s?o=%s", service, user, internalFileLimit); | ||
String jsonArrayString = Http.url(apiUrl) | ||
.ignoreContentType() | ||
.response() | ||
.body(); | ||
JSONArray jsonArray = new JSONArray(jsonArrayString); | ||
JSONObject wrapperObject = new JSONObject(); | ||
wrapperObject.put(KEY_WRAPPER_JSON_ARRAY, jsonArray); | ||
internalFileLimit += 50; | ||
if(jsonArray.isEmpty()){ | ||
return null; | ||
} | ||
return wrapperObject; | ||
} | ||
|
||
/** | ||
* Retrieves the URLs of files from a given JSON object and adds them to a list. | ||
* | ||
* @param json The JSON object containing information about the files. | ||
* @return A list of URLs of the files. | ||
*/ | ||
@Override | ||
protected List<String> getURLsFromJSON(JSONObject json) { | ||
// extract the array from our wrapper JSONObject | ||
JSONArray posts = json.getJSONArray(KEY_WRAPPER_JSON_ARRAY); | ||
ArrayList<String> urls = new ArrayList<>(); | ||
for (int i = 0; i < posts.length(); i++) { | ||
JSONObject post = posts.getJSONObject(i); | ||
pullFileUrl(post, urls); | ||
pullAttachmentUrls(post, urls); | ||
} | ||
LOGGER.debug("Pulled " + urls.size() + " URLs from " + posts.length() + " posts"); | ||
return urls; | ||
} | ||
|
||
@Override | ||
protected void downloadURL(URL url, int index) { | ||
addURLToDownload(url, getPrefix(index)); | ||
} | ||
|
||
/** | ||
* Retrieves the URL of a file from a given JSONObject and adds it to the provided list. | ||
* | ||
* @param post The JSONObject containing information about the file. | ||
* @param results The list to which the URL of the file will be added. | ||
*/ | ||
private void pullFileUrl(JSONObject post, ArrayList<String> results) { | ||
try { | ||
JSONObject file = post.getJSONObject(KEY_FILE); | ||
String path = file.getString(KEY_PATH); | ||
if (isImage(path)) { | ||
String url = IMG_URL_BASE + path; | ||
results.add(url); | ||
} else if (isVideo(path)) { | ||
String url = VID_URL_BASE + path; | ||
results.add(url); | ||
} else { | ||
LOGGER.error("Unknown extension for kemono.su path: " + path); | ||
} | ||
} catch (JSONException e) { | ||
/* No-op */ | ||
} | ||
} | ||
|
||
/** | ||
* Retrieves the URLs of attachments from a given JSONObject and adds them to the provided list. | ||
* | ||
* @param post The JSONObject containing information about the post. | ||
* @param results The list to which the URLs of the attachments will be added. | ||
*/ | ||
private void pullAttachmentUrls(JSONObject post, ArrayList<String> results) { | ||
try { | ||
JSONArray attachments = post.getJSONArray(KEY_ATTACHMENTS); | ||
for (int i = 0; i < attachments.length(); i++) { | ||
JSONObject attachment = attachments.getJSONObject(i); | ||
pullFileUrl(attachment, results); | ||
} | ||
} catch (JSONException e) { | ||
/* No-op */ | ||
} | ||
} | ||
|
||
/** | ||
* Checks if the given path represents an image file. | ||
* | ||
* @param path The path of the file to be checked. | ||
* @return True if the file is an image, false otherwise. | ||
*/ | ||
private boolean isImage(String path) { | ||
Matcher matcher = IMG_PATTERN.matcher(path); | ||
return matcher.matches(); | ||
} | ||
|
||
/** | ||
* Checks if the given path represents a video file. | ||
* | ||
* @param path The path of the file to be checked. | ||
* @return True if the file is a video, false otherwise. | ||
*/ | ||
private boolean isVideo(String path) { | ||
Matcher matcher = VID_PATTERN.matcher(path); | ||
return matcher.matches(); | ||
} | ||
} |
39 changes: 39 additions & 0 deletions
39
src/test/java/com/rarchives/ripme/tst/ripper/rippers/KemonoPartyRipperTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package com.rarchives.ripme.tst.ripper.rippers; | ||
|
||
import com.rarchives.ripme.ripper.rippers.KemonoPartyRipper; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import java.io.IOException; | ||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
||
public class KemonoPartyRipperTest extends RippersTest { | ||
@Test | ||
public void testRip() throws IOException, URISyntaxException { | ||
URL url = new URI("https://kemono.su/patreon/user/7874509").toURL(); | ||
KemonoPartyRipper ripper = new KemonoPartyRipper(url); | ||
testRipper(ripper); | ||
} | ||
|
||
@Test | ||
public void testUrlParsing() throws IOException, URISyntaxException { | ||
String expectedGid = "patreon_7874509"; | ||
String[] urls = new String[]{ | ||
"https://kemono.su/patreon/user/7874509", // normal url | ||
"http://kemono.su/patreon/user/7874509", // http, not https | ||
"https://kemono.su/patreon/user/7874509/", // with slash at the end | ||
"https://kemono.su/patreon/user/7874509?whatever=abc", // with url params | ||
"https://kemono.party/patreon/user/7874509", // alternate domain | ||
}; | ||
for (String stringUrl : urls) { | ||
URL url = new URI(stringUrl).toURL(); | ||
KemonoPartyRipper ripper = new KemonoPartyRipper(url); | ||
assertTrue(ripper.canRip(url)); | ||
assertEquals(expectedGid, ripper.getGID(url)); | ||
} | ||
} | ||
} |