diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java index bffd0f2d..eb7a421b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java @@ -4,9 +4,12 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -17,7 +20,7 @@ import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class PornhubRipper extends AlbumRipper { +public class PornhubRipper extends AbstractHTMLRipper { // All sleep times are in milliseconds private static final int IMAGE_SLEEP_TIME = 1000; @@ -26,9 +29,6 @@ public class PornhubRipper extends AlbumRipper { // Thread pool for finding direct image links from "image" pages (html) private DownloadThreadPool pornhubThreadPool = new DownloadThreadPool("pornhub"); - // Current HTML document - private Document albumDoc = null; - public PornhubRipper(URL url) throws IOException { super(url); } @@ -38,25 +38,63 @@ public class PornhubRipper extends AlbumRipper { return HOST; } - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; + @Override + protected String getDomain() { + return DOMAIN; } - public String getAlbumTitle(URL url) throws MalformedURLException { - try { - // Attempt to use album title as GID - if (albumDoc == null) { - LOGGER.info(" Retrieving " + url.toExternalForm()); - sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = Http.url(url).get(); - } - Elements elems = albumDoc.select(".photoAlbumTitleV2"); - return HOST + "_" + elems.get(0).text(); - } catch (Exception e) { - // Fall back to default album naming convention - LOGGER.warn("Failed to get album title from " + url, e); + @Override + protected Document getFirstPage() throws IOException { + return Http.url(url).referrer(url).get(); + } + + @Override + public Document getNextPage(Document page) throws IOException { + Elements nextPageLink = page.select("li.page_next > a"); + if (nextPageLink.isEmpty()){ + throw new IOException("No more pages"); + } else { + URL nextURL = new URL(this.url, nextPageLink.first().attr("href")); + return Http.url(nextURL).get(); + } + } + + @Override + protected List getURLsFromPage(Document page) { + List pageURLs = new ArrayList<>(); + // Find thumbnails + Elements thumbs = page.select(".photoBlockBox li"); + // Iterate over thumbnail images on page + for (Element thumb : thumbs) { + String imagePage = thumb.select(".photoAlbumListBlock > a") + .first().attr("href"); + String fullURL = "https://pornhub.com" + imagePage; + pageURLs.add(fullURL); + } + return pageURLs; + } + + @Override + protected void downloadURL(URL url, int index) { + PornhubImageThread t = new PornhubImageThread(url, index, this.workingDir); + pornhubThreadPool.addThread(t); + try { + Thread.sleep(IMAGE_SLEEP_TIME); + } catch (InterruptedException e) { + LOGGER.warn("Interrupted while waiting to load next image", e); + } + } + + public URL sanitizeURL(URL url) throws MalformedURLException { + // always start on the first page of an album + // (strip the options after the '?') + String u = url.toExternalForm(); + if (u.contains("?")) { + u = u.substring(0, u.indexOf("?")); + return new URL(u); + } else { + return url; } - return super.getAlbumTitle(url); } @Override @@ -64,7 +102,7 @@ public class PornhubRipper extends AlbumRipper { Pattern p; Matcher m; - p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+)$"); + p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+).*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); @@ -77,48 +115,8 @@ public class PornhubRipper extends AlbumRipper { } @Override - public void rip() throws IOException { - int index = 0; - String nextUrl = this.url.toExternalForm(); - - if (albumDoc == null) { - LOGGER.info(" Retrieving album page " + nextUrl); - sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = Http.url(nextUrl) - .referrer(this.url) - .get(); - } - - // Find thumbnails - Elements thumbs = albumDoc.select(".photoBlockBox li"); - if (thumbs.isEmpty()) { - LOGGER.debug("albumDoc: " + albumDoc); - LOGGER.debug("No images found at " + nextUrl); - return; - } - - // Iterate over images on page - for (Element thumb : thumbs) { - if (isStopped()) { - break; - } - index++; - String imagePageUrl = thumb.select(".photoAlbumListBlock > a").first().attr("href"); - URL imagePage = new URL(url, imagePageUrl); - PornhubImageThread t = new PornhubImageThread(imagePage, index, this.workingDir); - pornhubThreadPool.addThread(t); - if (isThisATest()) { - break; - } - try { - Thread.sleep(IMAGE_SLEEP_TIME); - } catch (InterruptedException e) { - LOGGER.warn("Interrupted while waiting to load next image", e); - } - } - - pornhubThreadPool.waitForThreads(); - waitForThreads(); + public DownloadThreadPool getThreadPool(){ + return pornhubThreadPool; } public boolean canRip(URL url) { diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java index 74bee8d9..278ad97c 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java @@ -4,6 +4,8 @@ import java.io.IOException; import java.net.URL; import com.rarchives.ripme.ripper.rippers.PornhubRipper; +import com.rarchives.ripme.utils.Http; +import org.jsoup.nodes.Document; public class PornhubRipperTest extends RippersTest { public void testPornhubRip() throws IOException { @@ -12,8 +14,28 @@ public class PornhubRipperTest extends RippersTest { } public void testGetGID() throws IOException { - URL url = new URL("https://www.pornhub.com/album/15680522"); + URL url = new URL("https://www.pornhub.com/album/15680522?page=2"); PornhubRipper ripper = new PornhubRipper(url); assertEquals("15680522", ripper.getGID(url)); + url = new URL("https://www.pornhub.com/album/15680522"); + assertEquals("15680522", ripper.getGID(url)); + } + + // alternate album, with only 2 pages: https://www.pornhub.com/album/4771891 + public void testGetNextPage() throws IOException { + String baseURL = "https://www.pornhub.com/album/15680522"; + PornhubRipper ripper = new PornhubRipper(new URL(baseURL)); + Document page = Http.url(baseURL).get(); + int numPagesRemaining = 4; + for (int idx = 0; idx < numPagesRemaining; idx++){ + page = ripper.getNextPage(page); + assertEquals(baseURL + "?page=" + (idx + 2), page.location()); + } + try { + page = ripper.getNextPage(page); + fail("Get next page did not throw an exception on the last page"); + } catch(IOException e){ + assertEquals(e.getMessage(), "No more pages"); + } } }