mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-09-02 02:12:45 +02:00
Make PornhubRipper inherit from AbstractHTMLRipper
Also make sure that if the album has multiple pages, make it download all pages (tested).
This commit is contained in:
@@ -4,9 +4,12 @@ import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
@@ -17,7 +20,7 @@ import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class PornhubRipper extends AlbumRipper {
|
||||
public class PornhubRipper extends AbstractHTMLRipper {
|
||||
// All sleep times are in milliseconds
|
||||
private static final int IMAGE_SLEEP_TIME = 1000;
|
||||
|
||||
@@ -26,9 +29,6 @@ public class PornhubRipper extends AlbumRipper {
|
||||
// Thread pool for finding direct image links from "image" pages (html)
|
||||
private DownloadThreadPool pornhubThreadPool = new DownloadThreadPool("pornhub");
|
||||
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
|
||||
public PornhubRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
@@ -38,25 +38,63 @@ public class PornhubRipper extends AlbumRipper {
|
||||
return HOST;
|
||||
}
|
||||
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return url;
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return DOMAIN;
|
||||
}
|
||||
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
if (albumDoc == null) {
|
||||
LOGGER.info(" Retrieving " + url.toExternalForm());
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
Elements elems = albumDoc.select(".photoAlbumTitleV2");
|
||||
return HOST + "_" + elems.get(0).text();
|
||||
} catch (Exception e) {
|
||||
// Fall back to default album naming convention
|
||||
LOGGER.warn("Failed to get album title from " + url, e);
|
||||
@Override
|
||||
protected Document getFirstPage() throws IOException {
|
||||
return Http.url(url).referrer(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document page) throws IOException {
|
||||
Elements nextPageLink = page.select("li.page_next > a");
|
||||
if (nextPageLink.isEmpty()){
|
||||
throw new IOException("No more pages");
|
||||
} else {
|
||||
URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
|
||||
return Http.url(nextURL).get();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
List<String> pageURLs = new ArrayList<>();
|
||||
// Find thumbnails
|
||||
Elements thumbs = page.select(".photoBlockBox li");
|
||||
// Iterate over thumbnail images on page
|
||||
for (Element thumb : thumbs) {
|
||||
String imagePage = thumb.select(".photoAlbumListBlock > a")
|
||||
.first().attr("href");
|
||||
String fullURL = "https://pornhub.com" + imagePage;
|
||||
pageURLs.add(fullURL);
|
||||
}
|
||||
return pageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
PornhubImageThread t = new PornhubImageThread(url, index, this.workingDir);
|
||||
pornhubThreadPool.addThread(t);
|
||||
try {
|
||||
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||
} catch (InterruptedException e) {
|
||||
LOGGER.warn("Interrupted while waiting to load next image", e);
|
||||
}
|
||||
}
|
||||
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
// always start on the first page of an album
|
||||
// (strip the options after the '?')
|
||||
String u = url.toExternalForm();
|
||||
if (u.contains("?")) {
|
||||
u = u.substring(0, u.indexOf("?"));
|
||||
return new URL(u);
|
||||
} else {
|
||||
return url;
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -64,7 +102,7 @@ public class PornhubRipper extends AlbumRipper {
|
||||
Pattern p;
|
||||
Matcher m;
|
||||
|
||||
p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+)$");
|
||||
p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
@@ -77,48 +115,8 @@ public class PornhubRipper extends AlbumRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
int index = 0;
|
||||
String nextUrl = this.url.toExternalForm();
|
||||
|
||||
if (albumDoc == null) {
|
||||
LOGGER.info(" Retrieving album page " + nextUrl);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
|
||||
albumDoc = Http.url(nextUrl)
|
||||
.referrer(this.url)
|
||||
.get();
|
||||
}
|
||||
|
||||
// Find thumbnails
|
||||
Elements thumbs = albumDoc.select(".photoBlockBox li");
|
||||
if (thumbs.isEmpty()) {
|
||||
LOGGER.debug("albumDoc: " + albumDoc);
|
||||
LOGGER.debug("No images found at " + nextUrl);
|
||||
return;
|
||||
}
|
||||
|
||||
// Iterate over images on page
|
||||
for (Element thumb : thumbs) {
|
||||
if (isStopped()) {
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
String imagePageUrl = thumb.select(".photoAlbumListBlock > a").first().attr("href");
|
||||
URL imagePage = new URL(url, imagePageUrl);
|
||||
PornhubImageThread t = new PornhubImageThread(imagePage, index, this.workingDir);
|
||||
pornhubThreadPool.addThread(t);
|
||||
if (isThisATest()) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||
} catch (InterruptedException e) {
|
||||
LOGGER.warn("Interrupted while waiting to load next image", e);
|
||||
}
|
||||
}
|
||||
|
||||
pornhubThreadPool.waitForThreads();
|
||||
waitForThreads();
|
||||
public DownloadThreadPool getThreadPool(){
|
||||
return pornhubThreadPool;
|
||||
}
|
||||
|
||||
public boolean canRip(URL url) {
|
||||
|
@@ -4,6 +4,8 @@ import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.PornhubRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
public class PornhubRipperTest extends RippersTest {
|
||||
public void testPornhubRip() throws IOException {
|
||||
@@ -12,8 +14,28 @@ public class PornhubRipperTest extends RippersTest {
|
||||
}
|
||||
|
||||
public void testGetGID() throws IOException {
|
||||
URL url = new URL("https://www.pornhub.com/album/15680522");
|
||||
URL url = new URL("https://www.pornhub.com/album/15680522?page=2");
|
||||
PornhubRipper ripper = new PornhubRipper(url);
|
||||
assertEquals("15680522", ripper.getGID(url));
|
||||
url = new URL("https://www.pornhub.com/album/15680522");
|
||||
assertEquals("15680522", ripper.getGID(url));
|
||||
}
|
||||
|
||||
// alternate album, with only 2 pages: https://www.pornhub.com/album/4771891
|
||||
public void testGetNextPage() throws IOException {
|
||||
String baseURL = "https://www.pornhub.com/album/15680522";
|
||||
PornhubRipper ripper = new PornhubRipper(new URL(baseURL));
|
||||
Document page = Http.url(baseURL).get();
|
||||
int numPagesRemaining = 4;
|
||||
for (int idx = 0; idx < numPagesRemaining; idx++){
|
||||
page = ripper.getNextPage(page);
|
||||
assertEquals(baseURL + "?page=" + (idx + 2), page.location());
|
||||
}
|
||||
try {
|
||||
page = ripper.getNextPage(page);
|
||||
fail("Get next page did not throw an exception on the last page");
|
||||
} catch(IOException e){
|
||||
assertEquals(e.getMessage(), "No more pages");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user