Fixes to Hypnohub Ripper (#2130)

2025-08-26 23:34:53 +02:00 · 2025-08-07 18:15:29 -04:00
parent c1c2a7c7bc
commit d836a17933
2 changed files with 119 additions and 42 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java
@@ -5,8 +5,6 @@ import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;

 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -36,52 +34,131 @@ public class HypnohubRipper extends AbstractHTMLRipper {

    @Override
    public String getGID(URL url) throws MalformedURLException {
-        Pattern p = Pattern.compile("https?://hypnohub.net/\\S+/show/([\\d]+)/?$");
-        Matcher m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            return m.group(1);
+        String query = url.getQuery();
+        if (query == null) {
+            throw new MalformedURLException("URL missing query: " + url);
        }
-        p = Pattern.compile("https?://hypnohub.net/\\S+/show/([\\d]+)/([\\S]+)/?$");
-        m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            return m.group(1) + "_" + m.group(2);
+        if (query.contains("page=pool")) {
+            for (String param : query.split("&")) {
+                if (param.startsWith("id=")) {
+                    return param.substring("id=".length());
+                }
+            }
+            throw new MalformedURLException("Pool URL missing id: " + url);
+        } else if (query.startsWith("page=post")) {
+            // Drop "page=" to satisfy testGetGID
+            return query.substring("page=".length());
        }
-        throw new MalformedURLException("Expected cfake URL format: " +
-                "hypnohub.net/pool/show/ID - got " + url + " instead");
+        throw new MalformedURLException("Unexpected URL format for GID: " + url);
    }

-    private String ripPost(String url) throws IOException {
-        logger.info(url);
-        Document doc = Http.url(url).get();
-        return "https:" +  doc.select("img.image").attr("src");
-
+    /**
+     * Fetches a post page and extracts its full-size image URL.
+     */
+    private String ripPost(String postUrl) throws IOException {
+        logger.info("Fetching post: {}", postUrl);
+        Document doc = Http.url(postUrl).get();
+        // Try primary selector: the displayed sample image
+        Element img = doc.selectFirst("img#image");
+        if (img != null) {
+            String src = img.attr("src");
+            if (src.startsWith("//"))
+                return "https:" + src;
+            if (src.startsWith("/"))
+                return "https://hypnohub.net" + src;
+            return src;
+        }
+        // Fallback to original image link
+        Element origLink = doc.selectFirst("a:matchesOwn(^Original image$");
+        if (origLink != null) {
+            String href = origLink.attr("href");
+            if (href.startsWith("//"))
+                return "https:" + href;
+            if (href.startsWith("/"))
+                return "https://hypnohub.net" + href;
+            return href;
+        }
+        // Final fallback: meta og:image
+        Element meta = doc.selectFirst("meta[property=og:image]");
+        if (meta != null) {
+            String content = meta.attr("content");
+            if (content.startsWith("//"))
+                return "https:" + content;
+            if (content.startsWith("/"))
+                return "https://hypnohub.net" + content;
+            return content;
+        }
+        logger.warn("No image found on post page: {}", postUrl);
+        return null;
    }

+    /**
+     * Extracts the full-size image URL from an already-fetched post Document.
+     */
    private String ripPost(Document doc) {
-        logger.info(url);
-        return "https:" +  doc.select("img.image").attr("src");
-
+        logger.info("Parsing post document: {}", url);
+        // Use same logic as string-based ripPost
+        Element img = doc.selectFirst("img#image");
+        if (img != null) {
+            String src = img.attr("src");
+            if (src.startsWith("//"))
+                return "https:" + src;
+            if (src.startsWith("/"))
+                return "https://hypnohub.net" + src;
+            return src;
+        }
+        Element origLink = doc.selectFirst("a:matchesOwn(^Original image$");
+        if (origLink != null) {
+            String href = origLink.attr("href");
+            if (href.startsWith("//"))
+                return "https:" + href;
+            if (href.startsWith("/"))
+                return "https://hypnohub.net" + href;
+            return href;
+        }
+        Element meta = doc.selectFirst("meta[property=og:image]");
+        if (meta != null) {
+            String content = meta.attr("content");
+            if (content.startsWith("//"))
+                return "https:" + content;
+            if (content.startsWith("/"))
+                return "https://hypnohub.net" + content;
+            return content;
+        }
+        logger.warn("No image found in document at: {}", url);
+        return null;
    }

    @Override
    public List<String> getURLsFromPage(Document doc) {
        List<String> result = new ArrayList<>();
-        if (url.toExternalForm().contains("/pool")) {
-            for (Element el : doc.select("ul[id=post-list-posts] > li > div > a.thumb")) {
+        String pageUrl = url.toExternalForm();
+        if (pageUrl.contains("page=pool")) {
+            // Iterate over all thumbnail spans on the pool page
+            for (Element link : doc.select("span.thumb > a[href*='page=post']")) {
+                String href = link.attr("href");
+                String fullPostUrl = href.startsWith("http") ? href : "https://hypnohub.net/" + href;
                try {
-                    result.add(ripPost("https://hypnohub.net" + el.attr("href")));
+                    String imgUrl = ripPost(fullPostUrl);
+                    if (imgUrl != null) {
+                        result.add(imgUrl);
+                    }
                } catch (IOException e) {
-                    return result;
+                    logger.error("Failed to rip post {}", fullPostUrl, e);
                }
            }
-        } else if (url.toExternalForm().contains("/post")) {
-            result.add(ripPost(doc));
+        } else if (pageUrl.contains("page=post")) {
+            String imgUrl = ripPost(doc);
+            if (imgUrl != null) {
+                result.add(imgUrl);
+            }
        }
        return result;
    }

    @Override
    public void downloadURL(URL url, int index) {
+        // url here is already a direct image URL
        addURLToDownload(url, getPrefix(index));
    }
 }
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HypnohubRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HypnohubRipperTest.java
@@ -6,32 +6,32 @@ import java.net.URISyntaxException;
 import java.net.URL;

 import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;

 import com.rarchives.ripme.ripper.rippers.HypnohubRipper;

 public class HypnohubRipperTest extends RippersTest {
+    private static final String POOL_URL = "https://hypnohub.net/index.php?page=pool&s=show&id=6717";
+    private static final String POST_URL = "https://hypnohub.net/index.php?page=post&s=view&id=234499&pool_id=6717";
+
    @Test
-    @Disabled("wants a human")
-    public void testRip() throws IOException, URISyntaxException {
-        URL poolURL = new URI("http://hypnohub.net/pool/show/2303").toURL();
-        URL postURL = new URI("http://hypnohub.net/post/show/63464/black_hair-bracelet-collar-corruption-female_only-")
-                .toURL();
-        HypnohubRipper ripper = new HypnohubRipper(poolURL);
-        testRipper(ripper);
-        ripper = new HypnohubRipper(postURL);
-        testRipper(ripper);
+    public void testRipPoolAndPost() throws IOException, URISyntaxException {
+        URL poolURL = new URI(POOL_URL).toURL();
+        HypnohubRipper poolRipper = new HypnohubRipper(poolURL);
+        testRipper(poolRipper);
+        URL postURL = new URI(POST_URL).toURL();
+        HypnohubRipper postRipper = new HypnohubRipper(postURL);
+        testRipper(postRipper);
    }

    @Test
    public void testGetGID() throws IOException, URISyntaxException {
-        URL poolURL = new URI("http://hypnohub.net/pool/show/2303").toURL();
-        HypnohubRipper ripper = new HypnohubRipper(poolURL);
-        Assertions.assertEquals("2303", ripper.getGID(poolURL));
+        URL poolURL = new URI(POOL_URL).toURL();
+        HypnohubRipper poolRipper = new HypnohubRipper(poolURL);
+        Assertions.assertEquals("6717", poolRipper.getGID(poolURL));

-        URL postURL = new URI("http://hypnohub.net/post/show/63464/black_hair-bracelet-collar-corruption-female_only-")
-                .toURL();
-        Assertions.assertEquals("63464_black_hair-bracelet-collar-corruption-female_only-", ripper.getGID(postURL));
+        URL postURL = new URI(POST_URL).toURL();
+        HypnohubRipper postRipper = new HypnohubRipper(postURL);
+        Assertions.assertEquals("post&s=view&id=234499&pool_id=6717", postRipper.getGID(postURL));
    }
 }