1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-01-17 20:58:31 +01:00

- Fixed Motherless ripper

- increased default retries to 3 (why only 1?)
- fixed deprecation message for Motherless ripper
This commit is contained in:
brant spar 2024-06-28 13:09:31 +10:00 committed by soloturn
parent a178d8f6b9
commit 71e20fe851
3 changed files with 16 additions and 7 deletions

View File

@ -5,6 +5,8 @@ import java.net.MalformedURLException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.net.URL; import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -61,7 +63,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
if (!notHome) { if (!notHome) {
StringBuilder newPath = new StringBuilder(path); StringBuilder newPath = new StringBuilder(path);
newPath.insert(2, "M"); newPath.insert(2, "M");
firstURL = new URL(this.url, "https://" + DOMAIN + newPath); firstURL = URI.create("https://" + DOMAIN + newPath).toURL();
LOGGER.info("Changed URL to " + firstURL); LOGGER.info("Changed URL to " + firstURL);
} }
return Http.url(firstURL).referrer("https://motherless.com").get(); return Http.url(firstURL).referrer("https://motherless.com").get();
@ -69,6 +71,9 @@ public class MotherlessRipper extends AbstractHTMLRipper {
@Override @Override
public Document getNextPage(Document doc) throws IOException, URISyntaxException { public Document getNextPage(Document doc) throws IOException, URISyntaxException {
Files.write(Paths.get("doc-next-page.txt"), doc.outerHtml().getBytes());
Elements nextPageLink = doc.head().select("link[rel=next]"); Elements nextPageLink = doc.head().select("link[rel=next]");
if (nextPageLink.isEmpty()) { if (nextPageLink.isEmpty()) {
throw new IOException("Last page reached"); throw new IOException("Last page reached");
@ -111,7 +116,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
@Override @Override
protected void downloadURL(URL url, int index) { protected void downloadURL(URL url, int index) {
// Create thread for finding image at "url" page // Create thread for finding image at "url" page
MotherlessImageThread mit = new MotherlessImageThread(url, index); MotherlessImageRunnable mit = new MotherlessImageRunnable(url, index);
motherlessThreadPool.addThread(mit); motherlessThreadPool.addThread(mit);
try { try {
Thread.sleep(IMAGE_SLEEP_TIME); Thread.sleep(IMAGE_SLEEP_TIME);
@ -150,15 +155,19 @@ public class MotherlessRipper extends AbstractHTMLRipper {
throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url); throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url);
} }
@Override
protected DownloadThreadPool getThreadPool() {
return motherlessThreadPool;
}
/** /**
* Helper class to find and download images found on "image" pages * Helper class to find and download images found on "image" pages
*/ */
private class MotherlessImageThread implements Runnable { private class MotherlessImageRunnable implements Runnable {
private final URL url; private final URL url;
private final int index; private final int index;
MotherlessImageThread(URL url, int index) { MotherlessImageRunnable(URL url, int index) {
super(); super();
this.url = url; this.url = url;
this.index = index; this.index = index;

View File

@ -56,7 +56,7 @@ public class Http {
} }
private void defaultSettings() { private void defaultSettings() {
this.retries = Utils.getConfigInteger("download.retries", 1); this.retries = Utils.getConfigInteger("download.retries", 3);
this.retrySleep = Utils.getConfigInteger("download.retry.sleep", 5000); this.retrySleep = Utils.getConfigInteger("download.retry.sleep", 5000);
connection = Jsoup.connect(this.url); connection = Jsoup.connect(this.url);
connection.userAgent(AbstractRipper.USER_AGENT); connection.userAgent(AbstractRipper.USER_AGENT);

View File

@ -6,7 +6,7 @@ threads.size = 5
file.overwrite = false file.overwrite = false
# Number of retries on failed downloads # Number of retries on failed downloads
download.retries = 1 download.retries = 3
# File download timeout (in milliseconds) # File download timeout (in milliseconds)
download.timeout = 60000 download.timeout = 60000