Make PornhubRipper inherit from AbstractHTMLRipper

Also make sure that if the album has multiple pages, make it download all pages (tested).
2025-09-02 10:23:47 +02:00 · 2018-10-29 21:03:25 +01:00
parent 5ae2bb43e8
commit da509663d1
2 changed files with 84 additions and 64 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
@@ -4,9 +4,12 @@ import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
@@ -17,7 +20,7 @@ import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Http;
 import com.rarchives.ripme.utils.Utils;
-public class PornhubRipper extends AlbumRipper {
+public class PornhubRipper extends AbstractHTMLRipper {
    // All sleep times are in milliseconds
    private static final int IMAGE_SLEEP_TIME    = 1000;
@@ -26,9 +29,6 @@ public class PornhubRipper extends AlbumRipper {
    // Thread pool for finding direct image links from "image" pages (html)
    private DownloadThreadPool pornhubThreadPool = new DownloadThreadPool("pornhub");
    // Current HTML document
    private Document albumDoc = null;
    public PornhubRipper(URL url) throws IOException {
        super(url);
    }
@@ -38,25 +38,63 @@ public class PornhubRipper extends AlbumRipper {
        return HOST;
    }
-    public URL sanitizeURL(URL url) throws MalformedURLException {
+    @Override
-        return url;
+    protected String getDomain() {
        return DOMAIN;
    }
-    public String getAlbumTitle(URL url) throws MalformedURLException {
+    @Override
    protected Document getFirstPage() throws IOException {
        return Http.url(url).referrer(url).get();
    }
    @Override
    public Document getNextPage(Document page) throws IOException {
        Elements nextPageLink = page.select("li.page_next > a");
        if (nextPageLink.isEmpty()){
            throw new IOException("No more pages");
        } else {
            URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
            return Http.url(nextURL).get();
        }
    }
    @Override
    protected List<String> getURLsFromPage(Document page) {
        List<String> pageURLs = new ArrayList<>();
        // Find thumbnails
        Elements thumbs = page.select(".photoBlockBox li");
        // Iterate over thumbnail images on page
        for (Element thumb : thumbs) {
            String imagePage = thumb.select(".photoAlbumListBlock > a")
                    .first().attr("href");
            String fullURL = "https://pornhub.com" + imagePage;
            pageURLs.add(fullURL);
        }
        return pageURLs;
    }
    @Override
    protected void downloadURL(URL url, int index) {
        PornhubImageThread t = new PornhubImageThread(url, index, this.workingDir);
        pornhubThreadPool.addThread(t);
        try {
-            // Attempt to use album title as GID
+            Thread.sleep(IMAGE_SLEEP_TIME);
-            if (albumDoc == null) {
+        } catch (InterruptedException e) {
-                LOGGER.info("    Retrieving " + url.toExternalForm());
+            LOGGER.warn("Interrupted while waiting to load next image", e);
                sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
                albumDoc = Http.url(url).get();
        }
            Elements elems = albumDoc.select(".photoAlbumTitleV2");
            return HOST + "_" + elems.get(0).text();
        } catch (Exception e) {
            // Fall back to default album naming convention
            LOGGER.warn("Failed to get album title from " + url, e);
    }
-        return super.getAlbumTitle(url);
+
    public URL sanitizeURL(URL url) throws MalformedURLException {
        // always start on the first page of an album
        // (strip the options after the '?')
        String u = url.toExternalForm();
        if (u.contains("?")) {
            u = u.substring(0, u.indexOf("?"));
            return new URL(u);
        } else {
            return url;
        }
    }
    @Override
@@ -64,7 +102,7 @@ public class PornhubRipper extends AlbumRipper {
        Pattern p;
        Matcher m;
-        p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+)$");
+        p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+).*$");
        m = p.matcher(url.toExternalForm());
        if (m.matches()) {
            return m.group(1);
@@ -77,48 +115,8 @@ public class PornhubRipper extends AlbumRipper {
    }
    @Override
-    public void rip() throws IOException {
+    public DownloadThreadPool getThreadPool(){
-        int index = 0;
+        return pornhubThreadPool;
        String nextUrl = this.url.toExternalForm();
        if (albumDoc == null) {
            LOGGER.info("    Retrieving album page " + nextUrl);
            sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
            albumDoc = Http.url(nextUrl)
                           .referrer(this.url)
                           .get();
        }
        // Find thumbnails
        Elements thumbs = albumDoc.select(".photoBlockBox li");
        if (thumbs.isEmpty()) {
            LOGGER.debug("albumDoc: " + albumDoc);
            LOGGER.debug("No images found at " + nextUrl);
            return;
        }
        // Iterate over images on page
        for (Element thumb : thumbs) {
            if (isStopped()) {
                break;
            }
            index++;
            String imagePageUrl = thumb.select(".photoAlbumListBlock > a").first().attr("href");
            URL imagePage = new URL(url, imagePageUrl);
            PornhubImageThread t = new PornhubImageThread(imagePage, index, this.workingDir);
            pornhubThreadPool.addThread(t);
            if (isThisATest()) {
                break;
            }
            try {
                Thread.sleep(IMAGE_SLEEP_TIME);
            } catch (InterruptedException e) {
                LOGGER.warn("Interrupted while waiting to load next image", e);
            }
        }
        pornhubThreadPool.waitForThreads();
        waitForThreads();
    }
    public boolean canRip(URL url) {
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java
@@ -4,6 +4,8 @@ import java.io.IOException;
 import java.net.URL;
 import com.rarchives.ripme.ripper.rippers.PornhubRipper;
 import com.rarchives.ripme.utils.Http;
 import org.jsoup.nodes.Document;
 public class PornhubRipperTest extends RippersTest {
    public void testPornhubRip() throws IOException {
@@ -12,8 +14,28 @@ public class PornhubRipperTest extends RippersTest {
    }
    public void testGetGID() throws IOException {
-        URL url = new URL("https://www.pornhub.com/album/15680522");
+        URL url = new URL("https://www.pornhub.com/album/15680522?page=2");
        PornhubRipper ripper = new PornhubRipper(url);
        assertEquals("15680522", ripper.getGID(url));
        url = new URL("https://www.pornhub.com/album/15680522");
        assertEquals("15680522", ripper.getGID(url));
    }
    // alternate album, with only 2 pages: https://www.pornhub.com/album/4771891
    public void testGetNextPage() throws IOException {
        String baseURL = "https://www.pornhub.com/album/15680522";
        PornhubRipper ripper = new PornhubRipper(new URL(baseURL));
        Document page = Http.url(baseURL).get();
        int numPagesRemaining = 4;
        for (int idx = 0; idx < numPagesRemaining; idx++){
            page = ripper.getNextPage(page);
            assertEquals(baseURL + "?page=" + (idx + 2), page.location());
        }
        try {
            page = ripper.getNextPage(page);
            fail("Get next page did not throw an exception on the last page");
        } catch(IOException e){
            assertEquals(e.getMessage(), "No more pages");
        }
    }
 }