1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-04-21 04:11:50 +02:00

Implement Fapwiz Ripper (#2086)

* Implement FapwizRipper
This commit is contained in:
metaprime 2025-02-11 09:15:26 -08:00 committed by GitHub
parent 647de8f3e2
commit e41eb25bf1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 316 additions and 0 deletions

View File

@ -0,0 +1,154 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class FapwizRipper extends AbstractHTMLRipper {
private static final Logger logger = LogManager.getLogger(FapwizRipper.class);
private static final Pattern CATEGORY_PATTERN = Pattern.compile("https?://fapwiz.com/category/([a-zA-Z0-9_-]+)/?$");
private static final Pattern USER_PATTERN = Pattern.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/?$");
// Note that the last part of the pattern can contain unicode emoji which
// get encoded as %-encoded UTF-8 bytes in the URL, so we allow % characters.
private static final Pattern POST_PATTERN = Pattern
.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/([a-zA-Z0-9_%-]+)/?$");
public FapwizRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "fapwiz";
}
@Override
public String getDomain() {
return "fapwiz.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m;
m = CATEGORY_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
return "category_" + m.group(1);
}
m = USER_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
return "user_" + m.group(1);
}
m = POST_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
return "post_" + m.group(1) + "_" + m.group(2);
}
throw new MalformedURLException("Expected fapwiz URL format: " +
"fapwiz.com/USER or fapwiz.com/USER/POST or " +
"fapwiz.com/CATEGORY - got " + url + " instead");
}
void processUserOrCategoryPage(Document doc, List<String> results) {
// The category page looks a lot like the structure of a user page,
// so processUserPage is written to be compatible with both.
doc.select(".post-items-holder img").forEach(e -> {
String imgSrc = e.attr("src");
// Skip the user profile picture thumbnail insets
if (imgSrc.endsWith("-thumbnail-icon.jpg")) {
return;
}
// Replace -thumbnail.jpg with .mp4
String videoSrc = imgSrc.replace("-thumbnail.jpg", ".mp4");
results.add(videoSrc);
});
}
void processCategoryPage(Document doc, List<String> results) {
logger.info("Processing category page: " + url);
processUserOrCategoryPage(doc, results);
}
void processUserPage(Document doc, List<String> results) {
logger.info("Processing user page: " + url);
processUserOrCategoryPage(doc, results);
}
void processPostPage(Document doc, List<String> results) {
logger.info("Processing post page: " + url);
doc.select("video source").forEach(video -> {
results.add(video.attr("src"));
});
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> results = new ArrayList<>();
Matcher m;
m = CATEGORY_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
processCategoryPage(doc, results);
}
m = USER_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
processUserPage(doc, results);
}
m = POST_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
processPostPage(doc, results);
}
return results;
}
private Document getDocument(String url, int retries) throws IOException {
return Http.url(url).userAgent(USER_AGENT).retries(retries).get();
}
private Document getDocument(String url) throws IOException {
return getDocument(url, 1);
}
@Override
public Document getNextPage(Document page) throws IOException {
logger.info("Getting next page for url: " + url);
Elements next = page.select("a.next");
if (!next.isEmpty()) {
String href = next.attr("href");
logger.info("Found next page: " + href);
return getDocument(href);
} else {
logger.info("No more pages");
throw new IOException("No more pages.");
}
}
@Override
public void downloadURL(URL url, int index) {
sleep(2000);
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -0,0 +1,162 @@
package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import com.rarchives.ripme.ripper.AbstractRipper;
import com.rarchives.ripme.ripper.rippers.FapwizRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class FapwizRipperTest extends RippersTest {
@Test
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
public void testGetNextPage_NoNextPage() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/alison-esha/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
try {
ripper.getNextPage(firstPage);
// If we don't throw, we failed the text because there *was* a next
// page even though there shouldn't be.
Assertions.fail();
} catch (IOException exception) {
Assertions.assertTrue(true);
}
}
@Test
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
public void testGetNextPage_HasNextPage() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/miaipanema/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
try {
Document doc = ripper.getNextPage(firstPage);
Assertions.assertNotNull(doc);
} catch (IOException exception) {
// We should have found a next page but didn't.
Assertions.fail();
}
}
@Test
public void testRipPost() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}
@Test
public void testRipPostWithNumbersInUsername1() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/desperate_bug_7776/lets-be-friends-that-secretly-fuck-thanks/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}
@Test
public void testRipPostWithEmojiInShortUrl() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/miaipanema/my-grip-needs-a-name-%f0%9f%a4%ad%f0%9f%91%87%f0%9f%8f%bc/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}
@Test
public void testRipPostWithEmojiInLongUrlAtEnd() throws IOException, URISyntaxException {
URL url = new URI(
"https://fapwiz.com/bimeat1998/just-imagine-youre-out-with-your-girl-and-your-buddies-and-then-she-makes-this-move-%f0%9f%98%8d/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}
@Test
public void testRipPostWithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
URL url = new URI(
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}
// TODO Test rip user
// TODO Test rip category
@Test
public void testPostGetGID1_Simple() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("post_petiteasiantravels_riding-at-9-months-pregnant", ripper.getGID(url));
}
// Test Post pages GetGID
@Test
public void testPostGetGID2_WithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
URL url = new URI(
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
// In this case the filesystem safe version of the GID is
// "post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-"
// but the GID doesn't truncate and doesn't remove non-filesystem-safe
// characters.
String gid = ripper.getGID(url);
Assertions.assertEquals(
"post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96",
gid);
// Test directory name on disk (filesystem safe sanitized as the ripper will
// do).
String directoryName = Utils.filesystemSafe(ripper.getHost() + "_" + gid);
Assertions.assertEquals(
"fapwiz_post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-",
directoryName);
}
// Test User pages GetGID
@Test
public void testUserGetGID1_Simple() throws IOException, URISyntaxException {
// Test a "simple" username that is all letters.
URL url = new URI("https://fapwiz.com/petiteasiantravels/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_petiteasiantravels", ripper.getGID(url));
}
@Test
public void testUserGetGID2_Numbers() throws IOException, URISyntaxException {
// Test a more complex username that contains numbers.
URL url = new URI("https://fapwiz.com/bimeat1998/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_bimeat1998", ripper.getGID(url));
}
@Test
public void testUserGetGID3_HyphensAndNumbers() throws IOException, URISyntaxException {
// Test a more complex username that contains hyphens and numbers.
URL url = new URI("https://fapwiz.com/used-airport-4076/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_used-airport-4076", ripper.getGID(url));
}
@Test
public void testUserGetGID4_Underscores() throws IOException, URISyntaxException {
// Test a more complex username that contains underscores.
URL url = new URI("https://fapwiz.com/desperate_bug_7776/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_desperate_bug_7776", ripper.getGID(url));
}
}