mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-09-02 10:23:47 +02:00
Make PornhubRipper inherit from AbstractHTMLRipper
Also make sure that if the album has multiple pages, make it download all pages (tested).
This commit is contained in:
@@ -4,9 +4,12 @@ import java.io.File;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
@@ -17,7 +20,7 @@ import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
public class PornhubRipper extends AlbumRipper {
|
public class PornhubRipper extends AbstractHTMLRipper {
|
||||||
// All sleep times are in milliseconds
|
// All sleep times are in milliseconds
|
||||||
private static final int IMAGE_SLEEP_TIME = 1000;
|
private static final int IMAGE_SLEEP_TIME = 1000;
|
||||||
|
|
||||||
@@ -26,9 +29,6 @@ public class PornhubRipper extends AlbumRipper {
|
|||||||
// Thread pool for finding direct image links from "image" pages (html)
|
// Thread pool for finding direct image links from "image" pages (html)
|
||||||
private DownloadThreadPool pornhubThreadPool = new DownloadThreadPool("pornhub");
|
private DownloadThreadPool pornhubThreadPool = new DownloadThreadPool("pornhub");
|
||||||
|
|
||||||
// Current HTML document
|
|
||||||
private Document albumDoc = null;
|
|
||||||
|
|
||||||
public PornhubRipper(URL url) throws IOException {
|
public PornhubRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
@@ -38,25 +38,63 @@ public class PornhubRipper extends AlbumRipper {
|
|||||||
return HOST;
|
return HOST;
|
||||||
}
|
}
|
||||||
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
@Override
|
||||||
return url;
|
protected String getDomain() {
|
||||||
|
return DOMAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
@Override
|
||||||
try {
|
protected Document getFirstPage() throws IOException {
|
||||||
// Attempt to use album title as GID
|
return Http.url(url).referrer(url).get();
|
||||||
if (albumDoc == null) {
|
}
|
||||||
LOGGER.info(" Retrieving " + url.toExternalForm());
|
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
|
@Override
|
||||||
albumDoc = Http.url(url).get();
|
public Document getNextPage(Document page) throws IOException {
|
||||||
}
|
Elements nextPageLink = page.select("li.page_next > a");
|
||||||
Elements elems = albumDoc.select(".photoAlbumTitleV2");
|
if (nextPageLink.isEmpty()){
|
||||||
return HOST + "_" + elems.get(0).text();
|
throw new IOException("No more pages");
|
||||||
} catch (Exception e) {
|
} else {
|
||||||
// Fall back to default album naming convention
|
URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
|
||||||
LOGGER.warn("Failed to get album title from " + url, e);
|
return Http.url(nextURL).get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> getURLsFromPage(Document page) {
|
||||||
|
List<String> pageURLs = new ArrayList<>();
|
||||||
|
// Find thumbnails
|
||||||
|
Elements thumbs = page.select(".photoBlockBox li");
|
||||||
|
// Iterate over thumbnail images on page
|
||||||
|
for (Element thumb : thumbs) {
|
||||||
|
String imagePage = thumb.select(".photoAlbumListBlock > a")
|
||||||
|
.first().attr("href");
|
||||||
|
String fullURL = "https://pornhub.com" + imagePage;
|
||||||
|
pageURLs.add(fullURL);
|
||||||
|
}
|
||||||
|
return pageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void downloadURL(URL url, int index) {
|
||||||
|
PornhubImageThread t = new PornhubImageThread(url, index, this.workingDir);
|
||||||
|
pornhubThreadPool.addThread(t);
|
||||||
|
try {
|
||||||
|
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOGGER.warn("Interrupted while waiting to load next image", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
|
// always start on the first page of an album
|
||||||
|
// (strip the options after the '?')
|
||||||
|
String u = url.toExternalForm();
|
||||||
|
if (u.contains("?")) {
|
||||||
|
u = u.substring(0, u.indexOf("?"));
|
||||||
|
return new URL(u);
|
||||||
|
} else {
|
||||||
|
return url;
|
||||||
}
|
}
|
||||||
return super.getAlbumTitle(url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -64,7 +102,7 @@ public class PornhubRipper extends AlbumRipper {
|
|||||||
Pattern p;
|
Pattern p;
|
||||||
Matcher m;
|
Matcher m;
|
||||||
|
|
||||||
p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+)$");
|
p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
@@ -77,48 +115,8 @@ public class PornhubRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public DownloadThreadPool getThreadPool(){
|
||||||
int index = 0;
|
return pornhubThreadPool;
|
||||||
String nextUrl = this.url.toExternalForm();
|
|
||||||
|
|
||||||
if (albumDoc == null) {
|
|
||||||
LOGGER.info(" Retrieving album page " + nextUrl);
|
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
|
|
||||||
albumDoc = Http.url(nextUrl)
|
|
||||||
.referrer(this.url)
|
|
||||||
.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find thumbnails
|
|
||||||
Elements thumbs = albumDoc.select(".photoBlockBox li");
|
|
||||||
if (thumbs.isEmpty()) {
|
|
||||||
LOGGER.debug("albumDoc: " + albumDoc);
|
|
||||||
LOGGER.debug("No images found at " + nextUrl);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Iterate over images on page
|
|
||||||
for (Element thumb : thumbs) {
|
|
||||||
if (isStopped()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
index++;
|
|
||||||
String imagePageUrl = thumb.select(".photoAlbumListBlock > a").first().attr("href");
|
|
||||||
URL imagePage = new URL(url, imagePageUrl);
|
|
||||||
PornhubImageThread t = new PornhubImageThread(imagePage, index, this.workingDir);
|
|
||||||
pornhubThreadPool.addThread(t);
|
|
||||||
if (isThisATest()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
Thread.sleep(IMAGE_SLEEP_TIME);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
LOGGER.warn("Interrupted while waiting to load next image", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pornhubThreadPool.waitForThreads();
|
|
||||||
waitForThreads();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canRip(URL url) {
|
public boolean canRip(URL url) {
|
||||||
|
@@ -4,6 +4,8 @@ import java.io.IOException;
|
|||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.rippers.PornhubRipper;
|
import com.rarchives.ripme.ripper.rippers.PornhubRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
|
||||||
public class PornhubRipperTest extends RippersTest {
|
public class PornhubRipperTest extends RippersTest {
|
||||||
public void testPornhubRip() throws IOException {
|
public void testPornhubRip() throws IOException {
|
||||||
@@ -12,8 +14,28 @@ public class PornhubRipperTest extends RippersTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testGetGID() throws IOException {
|
public void testGetGID() throws IOException {
|
||||||
URL url = new URL("https://www.pornhub.com/album/15680522");
|
URL url = new URL("https://www.pornhub.com/album/15680522?page=2");
|
||||||
PornhubRipper ripper = new PornhubRipper(url);
|
PornhubRipper ripper = new PornhubRipper(url);
|
||||||
assertEquals("15680522", ripper.getGID(url));
|
assertEquals("15680522", ripper.getGID(url));
|
||||||
|
url = new URL("https://www.pornhub.com/album/15680522");
|
||||||
|
assertEquals("15680522", ripper.getGID(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
// alternate album, with only 2 pages: https://www.pornhub.com/album/4771891
|
||||||
|
public void testGetNextPage() throws IOException {
|
||||||
|
String baseURL = "https://www.pornhub.com/album/15680522";
|
||||||
|
PornhubRipper ripper = new PornhubRipper(new URL(baseURL));
|
||||||
|
Document page = Http.url(baseURL).get();
|
||||||
|
int numPagesRemaining = 4;
|
||||||
|
for (int idx = 0; idx < numPagesRemaining; idx++){
|
||||||
|
page = ripper.getNextPage(page);
|
||||||
|
assertEquals(baseURL + "?page=" + (idx + 2), page.location());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
page = ripper.getNextPage(page);
|
||||||
|
fail("Get next page did not throw an exception on the last page");
|
||||||
|
} catch(IOException e){
|
||||||
|
assertEquals(e.getMessage(), "No more pages");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user