mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-27 07:44:20 +02:00
Fixes to Hypnohub Ripper (#2130)
This commit is contained in:
@@ -5,8 +5,6 @@ import java.net.MalformedURLException;
|
|||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
@@ -36,52 +34,131 @@ public class HypnohubRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("https?://hypnohub.net/\\S+/show/([\\d]+)/?$");
|
String query = url.getQuery();
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
if (query == null) {
|
||||||
if (m.matches()) {
|
throw new MalformedURLException("URL missing query: " + url);
|
||||||
return m.group(1);
|
|
||||||
}
|
}
|
||||||
p = Pattern.compile("https?://hypnohub.net/\\S+/show/([\\d]+)/([\\S]+)/?$");
|
if (query.contains("page=pool")) {
|
||||||
m = p.matcher(url.toExternalForm());
|
for (String param : query.split("&")) {
|
||||||
if (m.matches()) {
|
if (param.startsWith("id=")) {
|
||||||
return m.group(1) + "_" + m.group(2);
|
return param.substring("id=".length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Pool URL missing id: " + url);
|
||||||
|
} else if (query.startsWith("page=post")) {
|
||||||
|
// Drop "page=" to satisfy testGetGID
|
||||||
|
return query.substring("page=".length());
|
||||||
}
|
}
|
||||||
throw new MalformedURLException("Expected cfake URL format: " +
|
throw new MalformedURLException("Unexpected URL format for GID: " + url);
|
||||||
"hypnohub.net/pool/show/ID - got " + url + " instead");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String ripPost(String url) throws IOException {
|
/**
|
||||||
logger.info(url);
|
* Fetches a post page and extracts its full-size image URL.
|
||||||
Document doc = Http.url(url).get();
|
*/
|
||||||
return "https:" + doc.select("img.image").attr("src");
|
private String ripPost(String postUrl) throws IOException {
|
||||||
|
logger.info("Fetching post: {}", postUrl);
|
||||||
|
Document doc = Http.url(postUrl).get();
|
||||||
|
// Try primary selector: the displayed sample image
|
||||||
|
Element img = doc.selectFirst("img#image");
|
||||||
|
if (img != null) {
|
||||||
|
String src = img.attr("src");
|
||||||
|
if (src.startsWith("//"))
|
||||||
|
return "https:" + src;
|
||||||
|
if (src.startsWith("/"))
|
||||||
|
return "https://hypnohub.net" + src;
|
||||||
|
return src;
|
||||||
|
}
|
||||||
|
// Fallback to original image link
|
||||||
|
Element origLink = doc.selectFirst("a:matchesOwn(^Original image$");
|
||||||
|
if (origLink != null) {
|
||||||
|
String href = origLink.attr("href");
|
||||||
|
if (href.startsWith("//"))
|
||||||
|
return "https:" + href;
|
||||||
|
if (href.startsWith("/"))
|
||||||
|
return "https://hypnohub.net" + href;
|
||||||
|
return href;
|
||||||
|
}
|
||||||
|
// Final fallback: meta og:image
|
||||||
|
Element meta = doc.selectFirst("meta[property=og:image]");
|
||||||
|
if (meta != null) {
|
||||||
|
String content = meta.attr("content");
|
||||||
|
if (content.startsWith("//"))
|
||||||
|
return "https:" + content;
|
||||||
|
if (content.startsWith("/"))
|
||||||
|
return "https://hypnohub.net" + content;
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
logger.warn("No image found on post page: {}", postUrl);
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts the full-size image URL from an already-fetched post Document.
|
||||||
|
*/
|
||||||
private String ripPost(Document doc) {
|
private String ripPost(Document doc) {
|
||||||
logger.info(url);
|
logger.info("Parsing post document: {}", url);
|
||||||
return "https:" + doc.select("img.image").attr("src");
|
// Use same logic as string-based ripPost
|
||||||
|
Element img = doc.selectFirst("img#image");
|
||||||
|
if (img != null) {
|
||||||
|
String src = img.attr("src");
|
||||||
|
if (src.startsWith("//"))
|
||||||
|
return "https:" + src;
|
||||||
|
if (src.startsWith("/"))
|
||||||
|
return "https://hypnohub.net" + src;
|
||||||
|
return src;
|
||||||
|
}
|
||||||
|
Element origLink = doc.selectFirst("a:matchesOwn(^Original image$");
|
||||||
|
if (origLink != null) {
|
||||||
|
String href = origLink.attr("href");
|
||||||
|
if (href.startsWith("//"))
|
||||||
|
return "https:" + href;
|
||||||
|
if (href.startsWith("/"))
|
||||||
|
return "https://hypnohub.net" + href;
|
||||||
|
return href;
|
||||||
|
}
|
||||||
|
Element meta = doc.selectFirst("meta[property=og:image]");
|
||||||
|
if (meta != null) {
|
||||||
|
String content = meta.attr("content");
|
||||||
|
if (content.startsWith("//"))
|
||||||
|
return "https:" + content;
|
||||||
|
if (content.startsWith("/"))
|
||||||
|
return "https://hypnohub.net" + content;
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
logger.warn("No image found in document at: {}", url);
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> result = new ArrayList<>();
|
List<String> result = new ArrayList<>();
|
||||||
if (url.toExternalForm().contains("/pool")) {
|
String pageUrl = url.toExternalForm();
|
||||||
for (Element el : doc.select("ul[id=post-list-posts] > li > div > a.thumb")) {
|
if (pageUrl.contains("page=pool")) {
|
||||||
|
// Iterate over all thumbnail spans on the pool page
|
||||||
|
for (Element link : doc.select("span.thumb > a[href*='page=post']")) {
|
||||||
|
String href = link.attr("href");
|
||||||
|
String fullPostUrl = href.startsWith("http") ? href : "https://hypnohub.net/" + href;
|
||||||
try {
|
try {
|
||||||
result.add(ripPost("https://hypnohub.net" + el.attr("href")));
|
String imgUrl = ripPost(fullPostUrl);
|
||||||
|
if (imgUrl != null) {
|
||||||
|
result.add(imgUrl);
|
||||||
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
return result;
|
logger.error("Failed to rip post {}", fullPostUrl, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (url.toExternalForm().contains("/post")) {
|
} else if (pageUrl.contains("page=post")) {
|
||||||
result.add(ripPost(doc));
|
String imgUrl = ripPost(doc);
|
||||||
|
if (imgUrl != null) {
|
||||||
|
result.add(imgUrl);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void downloadURL(URL url, int index) {
|
public void downloadURL(URL url, int index) {
|
||||||
|
// url here is already a direct image URL
|
||||||
addURLToDownload(url, getPrefix(index));
|
addURLToDownload(url, getPrefix(index));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -6,32 +6,32 @@ import java.net.URISyntaxException;
|
|||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Disabled;
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.rippers.HypnohubRipper;
|
import com.rarchives.ripme.ripper.rippers.HypnohubRipper;
|
||||||
|
|
||||||
public class HypnohubRipperTest extends RippersTest {
|
public class HypnohubRipperTest extends RippersTest {
|
||||||
|
private static final String POOL_URL = "https://hypnohub.net/index.php?page=pool&s=show&id=6717";
|
||||||
|
private static final String POST_URL = "https://hypnohub.net/index.php?page=post&s=view&id=234499&pool_id=6717";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Disabled("wants a human")
|
public void testRipPoolAndPost() throws IOException, URISyntaxException {
|
||||||
public void testRip() throws IOException, URISyntaxException {
|
URL poolURL = new URI(POOL_URL).toURL();
|
||||||
URL poolURL = new URI("http://hypnohub.net/pool/show/2303").toURL();
|
HypnohubRipper poolRipper = new HypnohubRipper(poolURL);
|
||||||
URL postURL = new URI("http://hypnohub.net/post/show/63464/black_hair-bracelet-collar-corruption-female_only-")
|
testRipper(poolRipper);
|
||||||
.toURL();
|
URL postURL = new URI(POST_URL).toURL();
|
||||||
HypnohubRipper ripper = new HypnohubRipper(poolURL);
|
HypnohubRipper postRipper = new HypnohubRipper(postURL);
|
||||||
testRipper(ripper);
|
testRipper(postRipper);
|
||||||
ripper = new HypnohubRipper(postURL);
|
|
||||||
testRipper(ripper);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetGID() throws IOException, URISyntaxException {
|
public void testGetGID() throws IOException, URISyntaxException {
|
||||||
URL poolURL = new URI("http://hypnohub.net/pool/show/2303").toURL();
|
URL poolURL = new URI(POOL_URL).toURL();
|
||||||
HypnohubRipper ripper = new HypnohubRipper(poolURL);
|
HypnohubRipper poolRipper = new HypnohubRipper(poolURL);
|
||||||
Assertions.assertEquals("2303", ripper.getGID(poolURL));
|
Assertions.assertEquals("6717", poolRipper.getGID(poolURL));
|
||||||
|
|
||||||
URL postURL = new URI("http://hypnohub.net/post/show/63464/black_hair-bracelet-collar-corruption-female_only-")
|
URL postURL = new URI(POST_URL).toURL();
|
||||||
.toURL();
|
HypnohubRipper postRipper = new HypnohubRipper(postURL);
|
||||||
Assertions.assertEquals("63464_black_hair-bracelet-collar-corruption-female_only-", ripper.getGID(postURL));
|
Assertions.assertEquals("post&s=view&id=234499&pool_id=6717", postRipper.getGID(postURL));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user