mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-01-17 20:58:31 +01:00
- Fixed Motherless ripper
- increased default retries to 3 (why only 1?) - fixed deprecation message for Motherless ripper
This commit is contained in:
parent
a178d8f6b9
commit
71e20fe851
@ -5,6 +5,8 @@ import java.net.MalformedURLException;
|
|||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Paths;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
@ -61,7 +63,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
|||||||
if (!notHome) {
|
if (!notHome) {
|
||||||
StringBuilder newPath = new StringBuilder(path);
|
StringBuilder newPath = new StringBuilder(path);
|
||||||
newPath.insert(2, "M");
|
newPath.insert(2, "M");
|
||||||
firstURL = new URL(this.url, "https://" + DOMAIN + newPath);
|
firstURL = URI.create("https://" + DOMAIN + newPath).toURL();
|
||||||
LOGGER.info("Changed URL to " + firstURL);
|
LOGGER.info("Changed URL to " + firstURL);
|
||||||
}
|
}
|
||||||
return Http.url(firstURL).referrer("https://motherless.com").get();
|
return Http.url(firstURL).referrer("https://motherless.com").get();
|
||||||
@ -69,6 +71,9 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getNextPage(Document doc) throws IOException, URISyntaxException {
|
public Document getNextPage(Document doc) throws IOException, URISyntaxException {
|
||||||
|
|
||||||
|
Files.write(Paths.get("doc-next-page.txt"), doc.outerHtml().getBytes());
|
||||||
|
|
||||||
Elements nextPageLink = doc.head().select("link[rel=next]");
|
Elements nextPageLink = doc.head().select("link[rel=next]");
|
||||||
if (nextPageLink.isEmpty()) {
|
if (nextPageLink.isEmpty()) {
|
||||||
throw new IOException("Last page reached");
|
throw new IOException("Last page reached");
|
||||||
@ -111,7 +116,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
|||||||
@Override
|
@Override
|
||||||
protected void downloadURL(URL url, int index) {
|
protected void downloadURL(URL url, int index) {
|
||||||
// Create thread for finding image at "url" page
|
// Create thread for finding image at "url" page
|
||||||
MotherlessImageThread mit = new MotherlessImageThread(url, index);
|
MotherlessImageRunnable mit = new MotherlessImageRunnable(url, index);
|
||||||
motherlessThreadPool.addThread(mit);
|
motherlessThreadPool.addThread(mit);
|
||||||
try {
|
try {
|
||||||
Thread.sleep(IMAGE_SLEEP_TIME);
|
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||||
@ -150,15 +155,19 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
|||||||
throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url);
|
throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DownloadThreadPool getThreadPool() {
|
||||||
|
return motherlessThreadPool;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper class to find and download images found on "image" pages
|
* Helper class to find and download images found on "image" pages
|
||||||
*/
|
*/
|
||||||
private class MotherlessImageThread implements Runnable {
|
private class MotherlessImageRunnable implements Runnable {
|
||||||
private final URL url;
|
private final URL url;
|
||||||
private final int index;
|
private final int index;
|
||||||
|
|
||||||
MotherlessImageThread(URL url, int index) {
|
MotherlessImageRunnable(URL url, int index) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.index = index;
|
this.index = index;
|
||||||
|
@ -56,7 +56,7 @@ public class Http {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void defaultSettings() {
|
private void defaultSettings() {
|
||||||
this.retries = Utils.getConfigInteger("download.retries", 1);
|
this.retries = Utils.getConfigInteger("download.retries", 3);
|
||||||
this.retrySleep = Utils.getConfigInteger("download.retry.sleep", 5000);
|
this.retrySleep = Utils.getConfigInteger("download.retry.sleep", 5000);
|
||||||
connection = Jsoup.connect(this.url);
|
connection = Jsoup.connect(this.url);
|
||||||
connection.userAgent(AbstractRipper.USER_AGENT);
|
connection.userAgent(AbstractRipper.USER_AGENT);
|
||||||
|
@ -6,7 +6,7 @@ threads.size = 5
|
|||||||
file.overwrite = false
|
file.overwrite = false
|
||||||
|
|
||||||
# Number of retries on failed downloads
|
# Number of retries on failed downloads
|
||||||
download.retries = 1
|
download.retries = 3
|
||||||
|
|
||||||
# File download timeout (in milliseconds)
|
# File download timeout (in milliseconds)
|
||||||
download.timeout = 60000
|
download.timeout = 60000
|
||||||
|
Loading…
x
Reference in New Issue
Block a user