mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-04-21 04:11:50 +02:00
parent
647de8f3e2
commit
e41eb25bf1
@ -0,0 +1,154 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class FapwizRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(FapwizRipper.class);
|
||||
|
||||
private static final Pattern CATEGORY_PATTERN = Pattern.compile("https?://fapwiz.com/category/([a-zA-Z0-9_-]+)/?$");
|
||||
|
||||
private static final Pattern USER_PATTERN = Pattern.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/?$");
|
||||
|
||||
// Note that the last part of the pattern can contain unicode emoji which
|
||||
// get encoded as %-encoded UTF-8 bytes in the URL, so we allow % characters.
|
||||
private static final Pattern POST_PATTERN = Pattern
|
||||
.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/([a-zA-Z0-9_%-]+)/?$");
|
||||
|
||||
public FapwizRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "fapwiz";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "fapwiz.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Matcher m;
|
||||
|
||||
m = CATEGORY_PATTERN.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return "category_" + m.group(1);
|
||||
}
|
||||
|
||||
m = USER_PATTERN.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return "user_" + m.group(1);
|
||||
}
|
||||
|
||||
m = POST_PATTERN.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return "post_" + m.group(1) + "_" + m.group(2);
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected fapwiz URL format: " +
|
||||
"fapwiz.com/USER or fapwiz.com/USER/POST or " +
|
||||
"fapwiz.com/CATEGORY - got " + url + " instead");
|
||||
}
|
||||
|
||||
void processUserOrCategoryPage(Document doc, List<String> results) {
|
||||
// The category page looks a lot like the structure of a user page,
|
||||
// so processUserPage is written to be compatible with both.
|
||||
doc.select(".post-items-holder img").forEach(e -> {
|
||||
String imgSrc = e.attr("src");
|
||||
|
||||
// Skip the user profile picture thumbnail insets
|
||||
if (imgSrc.endsWith("-thumbnail-icon.jpg")) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Replace -thumbnail.jpg with .mp4
|
||||
String videoSrc = imgSrc.replace("-thumbnail.jpg", ".mp4");
|
||||
results.add(videoSrc);
|
||||
});
|
||||
}
|
||||
|
||||
void processCategoryPage(Document doc, List<String> results) {
|
||||
logger.info("Processing category page: " + url);
|
||||
processUserOrCategoryPage(doc, results);
|
||||
}
|
||||
|
||||
void processUserPage(Document doc, List<String> results) {
|
||||
logger.info("Processing user page: " + url);
|
||||
processUserOrCategoryPage(doc, results);
|
||||
}
|
||||
|
||||
void processPostPage(Document doc, List<String> results) {
|
||||
logger.info("Processing post page: " + url);
|
||||
doc.select("video source").forEach(video -> {
|
||||
results.add(video.attr("src"));
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> results = new ArrayList<>();
|
||||
Matcher m;
|
||||
|
||||
m = CATEGORY_PATTERN.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
processCategoryPage(doc, results);
|
||||
}
|
||||
|
||||
m = USER_PATTERN.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
processUserPage(doc, results);
|
||||
}
|
||||
|
||||
m = POST_PATTERN.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
processPostPage(doc, results);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private Document getDocument(String url, int retries) throws IOException {
|
||||
return Http.url(url).userAgent(USER_AGENT).retries(retries).get();
|
||||
}
|
||||
|
||||
private Document getDocument(String url) throws IOException {
|
||||
return getDocument(url, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document page) throws IOException {
|
||||
logger.info("Getting next page for url: " + url);
|
||||
Elements next = page.select("a.next");
|
||||
if (!next.isEmpty()) {
|
||||
String href = next.attr("href");
|
||||
logger.info("Found next page: " + href);
|
||||
return getDocument(href);
|
||||
} else {
|
||||
logger.info("No more pages");
|
||||
throw new IOException("No more pages.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
sleep(2000);
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
@ -0,0 +1,162 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Tag;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.FapwizRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class FapwizRipperTest extends RippersTest {
|
||||
@Test
|
||||
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
|
||||
public void testGetNextPage_NoNextPage() throws IOException, URISyntaxException {
|
||||
URL url = new URI("https://fapwiz.com/alison-esha/").toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
|
||||
Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
|
||||
try {
|
||||
ripper.getNextPage(firstPage);
|
||||
// If we don't throw, we failed the text because there *was* a next
|
||||
// page even though there shouldn't be.
|
||||
Assertions.fail();
|
||||
} catch (IOException exception) {
|
||||
Assertions.assertTrue(true);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
|
||||
public void testGetNextPage_HasNextPage() throws IOException, URISyntaxException {
|
||||
URL url = new URI("https://fapwiz.com/miaipanema/").toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
|
||||
Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
|
||||
try {
|
||||
Document doc = ripper.getNextPage(firstPage);
|
||||
Assertions.assertNotNull(doc);
|
||||
} catch (IOException exception) {
|
||||
// We should have found a next page but didn't.
|
||||
Assertions.fail();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRipPost() throws IOException, URISyntaxException {
|
||||
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRipPostWithNumbersInUsername1() throws IOException, URISyntaxException {
|
||||
URL url = new URI("https://fapwiz.com/desperate_bug_7776/lets-be-friends-that-secretly-fuck-thanks/").toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRipPostWithEmojiInShortUrl() throws IOException, URISyntaxException {
|
||||
URL url = new URI("https://fapwiz.com/miaipanema/my-grip-needs-a-name-%f0%9f%a4%ad%f0%9f%91%87%f0%9f%8f%bc/")
|
||||
.toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRipPostWithEmojiInLongUrlAtEnd() throws IOException, URISyntaxException {
|
||||
URL url = new URI(
|
||||
"https://fapwiz.com/bimeat1998/just-imagine-youre-out-with-your-girl-and-your-buddies-and-then-she-makes-this-move-%f0%9f%98%8d/")
|
||||
.toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRipPostWithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
|
||||
URL url = new URI(
|
||||
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
|
||||
.toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
// TODO Test rip user
|
||||
|
||||
// TODO Test rip category
|
||||
|
||||
@Test
|
||||
public void testPostGetGID1_Simple() throws IOException, URISyntaxException {
|
||||
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
Assertions.assertEquals("post_petiteasiantravels_riding-at-9-months-pregnant", ripper.getGID(url));
|
||||
}
|
||||
|
||||
// Test Post pages GetGID
|
||||
|
||||
@Test
|
||||
public void testPostGetGID2_WithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
|
||||
URL url = new URI(
|
||||
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
|
||||
.toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
|
||||
// In this case the filesystem safe version of the GID is
|
||||
// "post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-"
|
||||
// but the GID doesn't truncate and doesn't remove non-filesystem-safe
|
||||
// characters.
|
||||
String gid = ripper.getGID(url);
|
||||
Assertions.assertEquals(
|
||||
"post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96",
|
||||
gid);
|
||||
|
||||
// Test directory name on disk (filesystem safe sanitized as the ripper will
|
||||
// do).
|
||||
String directoryName = Utils.filesystemSafe(ripper.getHost() + "_" + gid);
|
||||
Assertions.assertEquals(
|
||||
"fapwiz_post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-",
|
||||
directoryName);
|
||||
}
|
||||
|
||||
// Test User pages GetGID
|
||||
|
||||
@Test
|
||||
public void testUserGetGID1_Simple() throws IOException, URISyntaxException {
|
||||
// Test a "simple" username that is all letters.
|
||||
URL url = new URI("https://fapwiz.com/petiteasiantravels/").toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
Assertions.assertEquals("user_petiteasiantravels", ripper.getGID(url));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUserGetGID2_Numbers() throws IOException, URISyntaxException {
|
||||
// Test a more complex username that contains numbers.
|
||||
URL url = new URI("https://fapwiz.com/bimeat1998/").toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
Assertions.assertEquals("user_bimeat1998", ripper.getGID(url));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUserGetGID3_HyphensAndNumbers() throws IOException, URISyntaxException {
|
||||
// Test a more complex username that contains hyphens and numbers.
|
||||
URL url = new URI("https://fapwiz.com/used-airport-4076/").toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
Assertions.assertEquals("user_used-airport-4076", ripper.getGID(url));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUserGetGID4_Underscores() throws IOException, URISyntaxException {
|
||||
// Test a more complex username that contains underscores.
|
||||
URL url = new URI("https://fapwiz.com/desperate_bug_7776/").toURL();
|
||||
FapwizRipper ripper = new FapwizRipper(url);
|
||||
Assertions.assertEquals("user_desperate_bug_7776", ripper.getGID(url));
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user