1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-15 10:24:04 +02:00

E621Ripper.java - add a rate limit to avoid 'SocketTimeoutException: Read timed out'; code formatting

This commit is contained in:
MetaPrime
2025-01-06 03:36:29 -08:00
parent 022461a862
commit 2dfb627e36

View File

@@ -1,13 +1,5 @@
package com.rarchives.ripme.ripper.rippers; package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URI; import java.net.URI;
@@ -26,7 +18,13 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
public class E621Ripper extends AbstractHTMLRipper { public class E621Ripper extends AbstractHTMLRipper {
private static final Logger logger = LogManager.getLogger(E621Ripper.class); private static final Logger logger = LogManager.getLogger(E621Ripper.class);
@@ -49,20 +47,26 @@ public class E621Ripper extends AbstractHTMLRipper {
private void loadConfig() { private void loadConfig() {
String cookiesString = Utils.getConfigString("e621.cookies", ""); String cookiesString = Utils.getConfigString("e621.cookies", "");
if(!cookiesString.equals("")) { if (!cookiesString.equals("")) {
cookies = RipUtils.getCookiesFromString(cookiesString); cookies = RipUtils.getCookiesFromString(cookiesString);
if(cookies.containsKey("cf_clearance"))
sendUpdate(STATUS.DOWNLOAD_WARN, "Using CloudFlare captcha cookies, make sure to update them and set your browser's useragent in config!"); if (cookies.containsKey("cf_clearance")) {
if(cookies.containsKey("remember")) sendUpdate(STATUS.DOWNLOAD_WARN,
"Using CloudFlare captcha cookies, make sure to update them and set your browser's useragent in config!");
}
if (cookies.containsKey("remember")) {
sendUpdate(STATUS.DOWNLOAD_WARN, "Logging in using auth cookie."); sendUpdate(STATUS.DOWNLOAD_WARN, "Logging in using auth cookie.");
}
} }
userAgent = Utils.getConfigString("e621.useragent", USER_AGENT); userAgent = Utils.getConfigString("e621.useragent", USER_AGENT);
} }
private void warnAboutBlacklist(Document page) { private void warnAboutBlacklist(Document page) {
if(!page.select("div.hidden-posts-notice").isEmpty()) if (!page.select("div.hidden-posts-notice").isEmpty())
sendUpdate(STATUS.DOWNLOAD_WARN, "Some posts are blacklisted. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options"); sendUpdate(STATUS.DOWNLOAD_WARN,
"Some posts are blacklisted. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options");
} }
private Document getDocument(String url, int retries) throws IOException { private Document getDocument(String url, int retries) throws IOException {
@@ -92,11 +96,12 @@ public class E621Ripper extends AbstractHTMLRipper {
public Document getFirstPage() throws IOException { public Document getFirstPage() throws IOException {
loadConfig(); loadConfig();
Document page; Document page;
if (url.getPath().startsWith("/pool")) if (url.getPath().startsWith("/pool")) {
page = getDocument("https://e621.net/pools/" + getTerm(url)); page = getDocument("https://e621.net/pools/" + getTerm(url));
else } else {
page = getDocument("https://e621.net/posts?tags=" + getTerm(url)); page = getDocument("https://e621.net/posts?tags=" + getTerm(url));
}
warnAboutBlacklist(page); warnAboutBlacklist(page);
return page; return page;
} }
@@ -127,6 +132,8 @@ public class E621Ripper extends AbstractHTMLRipper {
@Override @Override
public void downloadURL(final URL url, int index) { public void downloadURL(final URL url, int index) {
// rate limit
sleep(3000);
// addURLToDownload(url, getPrefix(index)); // addURLToDownload(url, getPrefix(index));
e621ThreadPool.addThread(new E621FileThread(url, getPrefix(index))); e621ThreadPool.addThread(new E621FileThread(url, getPrefix(index)));
} }
@@ -135,16 +142,24 @@ public class E621Ripper extends AbstractHTMLRipper {
// old url style => new url style: // old url style => new url style:
// /post/index/1/<tags> => /posts?tags=<tags> // /post/index/1/<tags> => /posts?tags=<tags>
// /pool/show/<id> => /pools/id // /pool/show/<id> => /pools/id
if (gidPattern == null) if (gidPattern == null) {
gidPattern = Pattern.compile( gidPattern = Pattern.compile(
"^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$"); "^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$");
if (gidPatternPool == null) }
if (gidPatternPool == null) {
gidPatternPool = Pattern.compile( gidPatternPool = Pattern.compile(
"^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$"); "^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
if (gidPatternNew == null) }
gidPatternNew = Pattern.compile("^https?://(www\\.)?e621\\.net/posts\\?([\\S]*?)tags=([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\&[\\S]+)?");
if (gidPatternPoolNew == null) if (gidPatternNew == null) {
gidPatternNew = Pattern.compile(
"^https?://(www\\.)?e621\\.net/posts\\?([\\S]*?)tags=([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\&[\\S]+)?");
}
if (gidPatternPoolNew == null) {
gidPatternPoolNew = Pattern.compile("^https?://(www\\.)?e621\\.net/pools/([\\d]+)(\\?[\\S]*)?"); gidPatternPoolNew = Pattern.compile("^https?://(www\\.)?e621\\.net/pools/([\\d]+)(\\?[\\S]*)?");
}
Matcher m = gidPattern.matcher(url.toExternalForm()); Matcher m = gidPattern.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
@@ -184,19 +199,20 @@ public class E621Ripper extends AbstractHTMLRipper {
@Override @Override
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException { public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
if (gidPattern2 == null) if (gidPattern2 == null) {
gidPattern2 = Pattern.compile( gidPattern2 = Pattern.compile(
"^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$"); "^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$");
}
Matcher m = gidPattern2.matcher(url.toExternalForm()); Matcher m = gidPattern2.matcher(url.toExternalForm());
if (m.matches()) if (m.matches()) {
return new URI("https://e621.net/post/index/1/" + m.group(2).replace("+", "%20")).toURL(); return new URI("https://e621.net/post/index/1/" + m.group(2).replace("+", "%20")).toURL();
}
return url; return url;
} }
public class E621FileThread implements Runnable { public class E621FileThread implements Runnable {
private final URL url; private final URL url;
private final String index; private final String index;
@@ -219,24 +235,29 @@ public class E621Ripper extends AbstractHTMLRipper {
private String getFullSizedImage(URL imageURL) throws IOException { private String getFullSizedImage(URL imageURL) throws IOException {
Document page = getDocument(imageURL.toExternalForm(), 3); Document page = getDocument(imageURL.toExternalForm(), 3);
/*Elements video = page.select("video > source");
Elements flash = page.select("embed"); /*
Elements image = page.select("a#highres"); * Elements video = page.select("video > source");
if (video.size() > 0) { * Elements flash = page.select("embed");
return video.attr("src"); * Elements image = page.select("a#highres");
} else if (flash.size() > 0) { * if (video.size() > 0) {
return flash.attr("src"); * return video.attr("src");
} else if (image.size() > 0) { * } else if (flash.size() > 0) {
return image.attr("href"); * return flash.attr("src");
} else { * } else if (image.size() > 0) {
throw new IOException(); * return image.attr("href");
}*/ * } else {
* throw new IOException();
* }
*/
if (!page.select("div#image-download-link > a").isEmpty()) { if (!page.select("div#image-download-link > a").isEmpty()) {
return page.select("div#image-download-link > a").attr("abs:href"); return page.select("div#image-download-link > a").attr("abs:href");
} else { } else {
if(!page.select("#blacklist-box").isEmpty()) if (!page.select("#blacklist-box").isEmpty()) {
sendUpdate(RipStatusMessage.STATUS.RIP_ERRORED, "Cannot download image - blocked by blacklist. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options"); sendUpdate(RipStatusMessage.STATUS.RIP_ERRORED,
"Cannot download image - blocked by blacklist. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options");
}
throw new IOException(); throw new IOException();
} }
} }