1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-06 13:56:34 +02:00

e621 fix regex

This commit is contained in:
BlackBirdd
2020-04-09 14:04:12 +02:00
parent 56f0aa3da3
commit 52b7e9443d

View File

@@ -6,6 +6,8 @@ import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils; import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils; import com.rarchives.ripme.utils.Utils;
import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
@@ -35,23 +37,36 @@ public class E621Ripper extends AbstractHTMLRipper {
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621"); private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
private Map<String, String> cookies = new HashMap<String, String>(); private Map<String, String> cookies = new HashMap<String, String>();
private String userAgent = USER_AGENT;
public E621Ripper(URL url) throws IOException { public E621Ripper(URL url) throws IOException {
super(url); super(url);
} }
private void loadCookies() { private void loadConfig() {
String cookiesString = Utils.getConfigString("e621.cookies", ""); String cookiesString = Utils.getConfigString("e621.cookies", "");
if(!cookiesString.equals("")) { if(!cookiesString.equals("")) {
cookies = RipUtils.getCookiesFromString(cookiesString); cookies = RipUtils.getCookiesFromString(cookiesString);
if(cookies.containsKey("cf_clearance")) if(cookies.containsKey("cf_clearance"))
sendUpdate(RipStatusMessage.STATUS.RIP_ERRORED, "Using CloudFlare captcha cookies, make sure to update them in config!"); sendUpdate(STATUS.DOWNLOAD_WARN, "Using CloudFlare captcha cookies, make sure to update them and set your browser's useragent in config!");
if(cookies.containsKey("remember"))
sendUpdate(STATUS.DOWNLOAD_WARN, "Logging in using auth cookie.");
} }
userAgent = Utils.getConfigString("e621.useragent", USER_AGENT);
} }
private void warnAboutBlacklist(Document page) { private void warnAboutBlacklist(Document page) {
if(!page.select("div.hidden-posts-notice").isEmpty()) if(!page.select("div.hidden-posts-notice").isEmpty())
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Some posts are blacklisted. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options"); sendUpdate(STATUS.DOWNLOAD_WARN, "Some posts are blacklisted. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options");
}
private Document getDocument(String url, int retries) throws IOException {
return Http.url(url).userAgent(userAgent).retries(retries).cookies(cookies).get();
}
private Document getDocument(String url) throws IOException {
return getDocument(url, 1);
} }
@Override @Override
@@ -71,12 +86,12 @@ public class E621Ripper extends AbstractHTMLRipper {
@Override @Override
public Document getFirstPage() throws IOException { public Document getFirstPage() throws IOException {
loadCookies(); loadConfig();
Document page; Document page;
if (url.getPath().startsWith("/pool")) if (url.getPath().startsWith("/pool"))
page = Http.url("https://e621.net/pools/" + getTerm(url)).cookies(cookies).get(); page = getDocument("https://e621.net/pools/" + getTerm(url));
else else
page = Http.url("https://e621.net/posts?tags=" + getTerm(url)).cookies(cookies).get(); page = getDocument("https://e621.net/posts?tags=" + getTerm(url));
warnAboutBlacklist(page); warnAboutBlacklist(page);
return page; return page;
@@ -100,7 +115,7 @@ public class E621Ripper extends AbstractHTMLRipper {
public Document getNextPage(Document page) throws IOException { public Document getNextPage(Document page) throws IOException {
warnAboutBlacklist(page); warnAboutBlacklist(page);
if (!page.select("a#paginator-next").isEmpty()) { if (!page.select("a#paginator-next").isEmpty()) {
return Http.url(page.select("a#paginator-next").attr("abs:href")).cookies(cookies).get(); return getDocument(page.select("a#paginator-next").attr("abs:href"));
} else { } else {
throw new IOException("No more pages."); throw new IOException("No more pages.");
} }
@@ -123,7 +138,7 @@ public class E621Ripper extends AbstractHTMLRipper {
gidPatternPool = Pattern.compile( gidPatternPool = Pattern.compile(
"^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$"); "^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
if (gidPatternNew == null) if (gidPatternNew == null)
gidPatternNew = Pattern.compile("^https?://(www\\.)?e621\\.net/posts\\?tags=([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\&[\\S]+)?"); gidPatternNew = Pattern.compile("^https?://(www\\.)?e621\\.net/posts\\?([\\S]*?)tags=([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\&[\\S]+)?");
if (gidPatternPoolNew == null) if (gidPatternPoolNew == null)
gidPatternPoolNew = Pattern.compile("^https?://(www\\.)?e621\\.net/pools/([\\d]+)(\\?[\\S]*)?"); gidPatternPoolNew = Pattern.compile("^https?://(www\\.)?e621\\.net/pools/([\\d]+)(\\?[\\S]*)?");
@@ -140,8 +155,8 @@ public class E621Ripper extends AbstractHTMLRipper {
m = gidPatternNew.matcher(url.toExternalForm()); m = gidPatternNew.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
LOGGER.info(m.group(2)); LOGGER.info(m.group(3));
return m.group(2); return m.group(3);
} }
m = gidPatternPoolNew.matcher(url.toExternalForm()); m = gidPatternPoolNew.matcher(url.toExternalForm());
@@ -199,7 +214,7 @@ public class E621Ripper extends AbstractHTMLRipper {
} }
private String getFullSizedImage(URL imageURL) throws IOException { private String getFullSizedImage(URL imageURL) throws IOException {
Document page = Http.url(imageURL).cookies(cookies).retries(3).get(); Document page = getDocument(imageURL.toExternalForm(), 3);
/*Elements video = page.select("video > source"); /*Elements video = page.select("video > source");
Elements flash = page.select("embed"); Elements flash = page.select("embed");
Elements image = page.select("a#highres"); Elements image = page.select("a#highres");