mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-17 19:26:34 +02:00
Merge pull request #1492 from felixaufreisen/motherlessFix
Fixed MotherlessRipper
This commit is contained in:
@@ -16,8 +16,11 @@ import com.rarchives.ripme.ripper.DownloadThreadPool;
|
|||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
public class MotherlessRipper extends AbstractHTMLRipper {
|
public class MotherlessRipper extends AbstractHTMLRipper {
|
||||||
|
// All sleep times are in milliseconds
|
||||||
|
private static final int IMAGE_SLEEP_TIME = 1000;
|
||||||
|
|
||||||
private static final String DOMAIN = "motherless.com",
|
private static final String DOMAIN = "motherless.com",
|
||||||
HOST = "motherless";
|
HOST = "motherless";
|
||||||
@@ -46,7 +49,32 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Document getFirstPage() throws IOException {
|
protected Document getFirstPage() throws IOException {
|
||||||
return Http.url(url).referrer("http://motherless.com").get();
|
URL firstURL = this.url;
|
||||||
|
String path = this.url.getPath();
|
||||||
|
// Check if "All Uploads" (/GMxxxx), Image (/GIxxxx) or Video (/GVxxxx) gallery since there's no "next" after the homepage (/Gxxxx)
|
||||||
|
Pattern p = Pattern.compile("[MIV]");
|
||||||
|
Matcher m = p.matcher(String.valueOf(path.charAt(2)));
|
||||||
|
boolean notHome = m.matches();
|
||||||
|
// If it's the homepage go to the "All Uploads" gallery (/Gxxxxx -> /GMxxxxx)
|
||||||
|
if (!notHome) {
|
||||||
|
StringBuilder newPath = new StringBuilder(path);
|
||||||
|
newPath.insert(2, "M");
|
||||||
|
firstURL = new URL(this.url, "https://" + DOMAIN + newPath);
|
||||||
|
LOGGER.info("Changed URL to " + firstURL);
|
||||||
|
}
|
||||||
|
return Http.url(firstURL).referrer("https://motherless.com").get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
Elements nextPageLink = doc.head().select("link[rel=next]");
|
||||||
|
if (nextPageLink.isEmpty()) {
|
||||||
|
throw new IOException("Last page reached");
|
||||||
|
} else {
|
||||||
|
String referrerLink = doc.head().select("link[rel=canonical]").first().attr("href");
|
||||||
|
URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
|
||||||
|
return Http.url(nextURL).referrer(referrerLink).get();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -64,7 +92,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
String url;
|
String url;
|
||||||
if (!thumbURL.startsWith("http")) {
|
if (!thumbURL.startsWith("http")) {
|
||||||
url = "http://" + DOMAIN + thumbURL;
|
url = "https://" + DOMAIN + thumbURL;
|
||||||
} else {
|
} else {
|
||||||
url = thumbURL;
|
url = thumbURL;
|
||||||
}
|
}
|
||||||
@@ -83,6 +111,11 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
|||||||
// Create thread for finding image at "url" page
|
// Create thread for finding image at "url" page
|
||||||
MotherlessImageThread mit = new MotherlessImageThread(url, index);
|
MotherlessImageThread mit = new MotherlessImageThread(url, index);
|
||||||
motherlessThreadPool.addThread(mit);
|
motherlessThreadPool.addThread(mit);
|
||||||
|
try {
|
||||||
|
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOGGER.warn("Interrupted while waiting to load next image", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -112,40 +145,9 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
|||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(m.groupCount());
|
return m.group(m.groupCount());
|
||||||
}
|
}
|
||||||
throw new MalformedURLException("Expected URL format: http://motherless.com/GIXXXXXXX, got: " + url);
|
throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rip() throws IOException {
|
|
||||||
int index = 0, page = 1;
|
|
||||||
String nextURL = this.url.toExternalForm();
|
|
||||||
while (nextURL != null) {
|
|
||||||
if (isStopped()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
LOGGER.info("Retrieving " + nextURL);
|
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, nextURL);
|
|
||||||
Document doc = getFirstPage();
|
|
||||||
List<String> URLs = getURLsFromPage(doc);
|
|
||||||
|
|
||||||
for (String url: URLs) {
|
|
||||||
downloadURL(new URL(url), index);
|
|
||||||
index ++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isThisATest()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Next page
|
|
||||||
nextURL = null;
|
|
||||||
page++;
|
|
||||||
if (doc.html().contains("?page=" + page)) {
|
|
||||||
nextURL = this.url.toExternalForm() + "?page=" + page;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
motherlessThreadPool.waitForThreads();
|
|
||||||
waitForThreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper class to find and download images found on "image" pages
|
* Helper class to find and download images found on "image" pages
|
||||||
|
Reference in New Issue
Block a user