Skip to content

Commit

Permalink
Kemono Party Ripper and test case. (#163)
Browse files Browse the repository at this point in the history
  • Loading branch information
Undid-Iridium authored and soloturn committed Dec 9, 2023
1 parent 7aa7989 commit addaec0
Show file tree
Hide file tree
Showing 2 changed files with 288 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
package com.rarchives.ripme.ripper.rippers;

import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* <a href="https://kemono.su/api/schema">See this link for the API schema</a>.
*/
public class KemonoPartyRipper extends AbstractJSONRipper {
private static final Logger LOGGER = LogManager.getLogger(KemonoPartyRipper.class);
private static final String IMG_URL_BASE = "https://c3.kemono.su/data";
private static final String VID_URL_BASE = "https://c1.kemono.su/data";
private static final Pattern IMG_PATTERN = Pattern.compile("^.*\\.(jpg|jpeg|png|gif|apng|webp|tif|tiff)$", Pattern.CASE_INSENSITIVE);
private static final Pattern VID_PATTERN = Pattern.compile("^.*\\.(webm|mp4|m4v)$", Pattern.CASE_INSENSITIVE);

// just so we can return a JSONObject from getFirstPage
private static final String KEY_WRAPPER_JSON_ARRAY = "array";

private static final String KEY_FILE = "file";
private static final String KEY_PATH = "path";
private static final String KEY_ATTACHMENTS = "attachments";

// One of "onlyfans" or "fansly", but might have others in future?
private final String service;

// Username of the page to be ripped
private final String user;
private int internalFileLimit;

/**
* This method is used to initialize a KemonoPartyRipper object with the given URL.
*
* @param url The URL from which to rip the content.
* @throws IOException If an I/O error occurs while initializing the ripper.
*/
public KemonoPartyRipper(URL url) throws IOException {
super(url);
List<String> pathElements = Arrays.stream(url.getPath().split("/"))
.filter(element -> !element.isBlank())
.toList();

service = pathElements.get(0);
user = pathElements.get(2);

if (service == null || user == null || service.isBlank() || user.isBlank()) {
LOGGER.warn("service=" + service + ", user=" + user);
throw new MalformedURLException("Invalid kemono.party URL: " + url);
}
LOGGER.debug("Parsed service=" + service + " and user=" + user + " from " + url);
}

@Override
protected String getDomain() {
return "kemono.party";
}

@Override
public String getHost() {
return "kemono.party";
}

@Override
public boolean canRip(URL url) {
String host = url.getHost();
return host.endsWith("kemono.party") || host.endsWith("kemono.su");
}

/**
* Retrieves the GID (Group ID) for a given URL.
*
* @param url The URL for which to retrieve the GID.
* @return The GID as a String.
*/
@Override
public String getGID(URL url) {
return Utils.filesystemSafe(String.format("%s_%s", service, user));
}

/**
* Retrieves the first page of data from the API.
*
* @return The first page of data as a JSONObject.
* @throws IOException If an I/O error occurs.
*/
@Override
protected JSONObject getFirstPage() throws IOException {
String apiUrl = String.format("https://kemono.su/api/v1/%s/user/%s", service, user);
String jsonArrayString = Http.url(apiUrl)
.ignoreContentType()
.response()
.body();
JSONArray jsonArray = new JSONArray(jsonArrayString);
internalFileLimit += 50;
// Ideally we'd just return the JSONArray from here, but we have to wrap it in a JSONObject
JSONObject wrapperObject = new JSONObject();
wrapperObject.put(KEY_WRAPPER_JSON_ARRAY, jsonArray);
return wrapperObject;
}

/**
* Retrieves the title of an album from the given URL.
*
* @param url The URL of the album.
* @return The title of the album.
* @throws MalformedURLException If the URL is malformed.
*/
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
String title;
try {
//Gets artist name
title = getHost() + "_" + getGID(url) + "_" + Http.url(url).get().select("meta[name=artist_name][content]").attr("content");
}catch (Exception e){
LOGGER.info("Failed to get album title, using id.");
title = getGID(url);
}
return title;
}

/**
* Retrieves the next page of data from the API. Kemono uses a 50 request limit, so you must offset by 50 each time.
*
* @param doc The current page data as a JSONObject.
* @return The next page data as a JSONObject.
* @throws IOException If an I/O error occurs.
* @throws URISyntaxException If the URI syntax is invalid.
*/
@Override
protected JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException {
String apiUrl = String.format("https://kemono.su/api/v1/%s/user/%s?o=%s", service, user, internalFileLimit);
String jsonArrayString = Http.url(apiUrl)
.ignoreContentType()
.response()
.body();
JSONArray jsonArray = new JSONArray(jsonArrayString);
JSONObject wrapperObject = new JSONObject();
wrapperObject.put(KEY_WRAPPER_JSON_ARRAY, jsonArray);
internalFileLimit += 50;
if(jsonArray.isEmpty()){
return null;
}
return wrapperObject;
}

/**
* Retrieves the URLs of files from a given JSON object and adds them to a list.
*
* @param json The JSON object containing information about the files.
* @return A list of URLs of the files.
*/
@Override
protected List<String> getURLsFromJSON(JSONObject json) {
// extract the array from our wrapper JSONObject
JSONArray posts = json.getJSONArray(KEY_WRAPPER_JSON_ARRAY);
ArrayList<String> urls = new ArrayList<>();
for (int i = 0; i < posts.length(); i++) {
JSONObject post = posts.getJSONObject(i);
pullFileUrl(post, urls);
pullAttachmentUrls(post, urls);
}
LOGGER.debug("Pulled " + urls.size() + " URLs from " + posts.length() + " posts");
return urls;
}

@Override
protected void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}

/**
* Retrieves the URL of a file from a given JSONObject and adds it to the provided list.
*
* @param post The JSONObject containing information about the file.
* @param results The list to which the URL of the file will be added.
*/
private void pullFileUrl(JSONObject post, ArrayList<String> results) {
try {
JSONObject file = post.getJSONObject(KEY_FILE);
String path = file.getString(KEY_PATH);
if (isImage(path)) {
String url = IMG_URL_BASE + path;
results.add(url);
} else if (isVideo(path)) {
String url = VID_URL_BASE + path;
results.add(url);
} else {
LOGGER.error("Unknown extension for kemono.su path: " + path);
}
} catch (JSONException e) {
/* No-op */
}
}

/**
* Retrieves the URLs of attachments from a given JSONObject and adds them to the provided list.
*
* @param post The JSONObject containing information about the post.
* @param results The list to which the URLs of the attachments will be added.
*/
private void pullAttachmentUrls(JSONObject post, ArrayList<String> results) {
try {
JSONArray attachments = post.getJSONArray(KEY_ATTACHMENTS);
for (int i = 0; i < attachments.length(); i++) {
JSONObject attachment = attachments.getJSONObject(i);
pullFileUrl(attachment, results);
}
} catch (JSONException e) {
/* No-op */
}
}

/**
* Checks if the given path represents an image file.
*
* @param path The path of the file to be checked.
* @return True if the file is an image, false otherwise.
*/
private boolean isImage(String path) {
Matcher matcher = IMG_PATTERN.matcher(path);
return matcher.matches();
}

/**
* Checks if the given path represents a video file.
*
* @param path The path of the file to be checked.
* @return True if the file is a video, false otherwise.
*/
private boolean isVideo(String path) {
Matcher matcher = VID_PATTERN.matcher(path);
return matcher.matches();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package com.rarchives.ripme.tst.ripper.rippers;

import com.rarchives.ripme.ripper.rippers.KemonoPartyRipper;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

public class KemonoPartyRipperTest extends RippersTest {
@Test
public void testRip() throws IOException, URISyntaxException {
URL url = new URI("https://kemono.su/patreon/user/7874509").toURL();
KemonoPartyRipper ripper = new KemonoPartyRipper(url);
testRipper(ripper);
}

@Test
public void testUrlParsing() throws IOException, URISyntaxException {
String expectedGid = "patreon_7874509";
String[] urls = new String[]{
"https://kemono.su/patreon/user/7874509", // normal url
"http://kemono.su/patreon/user/7874509", // http, not https
"https://kemono.su/patreon/user/7874509/", // with slash at the end
"https://kemono.su/patreon/user/7874509?whatever=abc", // with url params
"https://kemono.party/patreon/user/7874509", // alternate domain
};
for (String stringUrl : urls) {
URL url = new URI(stringUrl).toURL();
KemonoPartyRipper ripper = new KemonoPartyRipper(url);
assertTrue(ripper.canRip(url));
assertEquals(expectedGid, ripper.getGID(url));
}
}
}

0 comments on commit addaec0

Please sign in to comment.