mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-06 13:56:34 +02:00
e621 fix regex
This commit is contained in:
@@ -6,6 +6,8 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
import com.rarchives.ripme.utils.RipUtils;
|
import com.rarchives.ripme.utils.RipUtils;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
@@ -35,23 +37,36 @@ public class E621Ripper extends AbstractHTMLRipper {
|
|||||||
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
|
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
|
||||||
|
|
||||||
private Map<String, String> cookies = new HashMap<String, String>();
|
private Map<String, String> cookies = new HashMap<String, String>();
|
||||||
|
private String userAgent = USER_AGENT;
|
||||||
|
|
||||||
public E621Ripper(URL url) throws IOException {
|
public E621Ripper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void loadCookies() {
|
private void loadConfig() {
|
||||||
String cookiesString = Utils.getConfigString("e621.cookies", "");
|
String cookiesString = Utils.getConfigString("e621.cookies", "");
|
||||||
if(!cookiesString.equals("")) {
|
if(!cookiesString.equals("")) {
|
||||||
cookies = RipUtils.getCookiesFromString(cookiesString);
|
cookies = RipUtils.getCookiesFromString(cookiesString);
|
||||||
if(cookies.containsKey("cf_clearance"))
|
if(cookies.containsKey("cf_clearance"))
|
||||||
sendUpdate(RipStatusMessage.STATUS.RIP_ERRORED, "Using CloudFlare captcha cookies, make sure to update them in config!");
|
sendUpdate(STATUS.DOWNLOAD_WARN, "Using CloudFlare captcha cookies, make sure to update them and set your browser's useragent in config!");
|
||||||
|
if(cookies.containsKey("remember"))
|
||||||
|
sendUpdate(STATUS.DOWNLOAD_WARN, "Logging in using auth cookie.");
|
||||||
}
|
}
|
||||||
|
userAgent = Utils.getConfigString("e621.useragent", USER_AGENT);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void warnAboutBlacklist(Document page) {
|
private void warnAboutBlacklist(Document page) {
|
||||||
if(!page.select("div.hidden-posts-notice").isEmpty())
|
if(!page.select("div.hidden-posts-notice").isEmpty())
|
||||||
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Some posts are blacklisted. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options");
|
sendUpdate(STATUS.DOWNLOAD_WARN, "Some posts are blacklisted. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options");
|
||||||
|
}
|
||||||
|
|
||||||
|
private Document getDocument(String url, int retries) throws IOException {
|
||||||
|
return Http.url(url).userAgent(userAgent).retries(retries).cookies(cookies).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Document getDocument(String url) throws IOException {
|
||||||
|
return getDocument(url, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -71,12 +86,12 @@ public class E621Ripper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
loadCookies();
|
loadConfig();
|
||||||
Document page;
|
Document page;
|
||||||
if (url.getPath().startsWith("/pool"))
|
if (url.getPath().startsWith("/pool"))
|
||||||
page = Http.url("https://e621.net/pools/" + getTerm(url)).cookies(cookies).get();
|
page = getDocument("https://e621.net/pools/" + getTerm(url));
|
||||||
else
|
else
|
||||||
page = Http.url("https://e621.net/posts?tags=" + getTerm(url)).cookies(cookies).get();
|
page = getDocument("https://e621.net/posts?tags=" + getTerm(url));
|
||||||
|
|
||||||
warnAboutBlacklist(page);
|
warnAboutBlacklist(page);
|
||||||
return page;
|
return page;
|
||||||
@@ -100,7 +115,7 @@ public class E621Ripper extends AbstractHTMLRipper {
|
|||||||
public Document getNextPage(Document page) throws IOException {
|
public Document getNextPage(Document page) throws IOException {
|
||||||
warnAboutBlacklist(page);
|
warnAboutBlacklist(page);
|
||||||
if (!page.select("a#paginator-next").isEmpty()) {
|
if (!page.select("a#paginator-next").isEmpty()) {
|
||||||
return Http.url(page.select("a#paginator-next").attr("abs:href")).cookies(cookies).get();
|
return getDocument(page.select("a#paginator-next").attr("abs:href"));
|
||||||
} else {
|
} else {
|
||||||
throw new IOException("No more pages.");
|
throw new IOException("No more pages.");
|
||||||
}
|
}
|
||||||
@@ -123,7 +138,7 @@ public class E621Ripper extends AbstractHTMLRipper {
|
|||||||
gidPatternPool = Pattern.compile(
|
gidPatternPool = Pattern.compile(
|
||||||
"^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
|
"^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
|
||||||
if (gidPatternNew == null)
|
if (gidPatternNew == null)
|
||||||
gidPatternNew = Pattern.compile("^https?://(www\\.)?e621\\.net/posts\\?tags=([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\&[\\S]+)?");
|
gidPatternNew = Pattern.compile("^https?://(www\\.)?e621\\.net/posts\\?([\\S]*?)tags=([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\&[\\S]+)?");
|
||||||
if (gidPatternPoolNew == null)
|
if (gidPatternPoolNew == null)
|
||||||
gidPatternPoolNew = Pattern.compile("^https?://(www\\.)?e621\\.net/pools/([\\d]+)(\\?[\\S]*)?");
|
gidPatternPoolNew = Pattern.compile("^https?://(www\\.)?e621\\.net/pools/([\\d]+)(\\?[\\S]*)?");
|
||||||
|
|
||||||
@@ -140,8 +155,8 @@ public class E621Ripper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
m = gidPatternNew.matcher(url.toExternalForm());
|
m = gidPatternNew.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
LOGGER.info(m.group(2));
|
LOGGER.info(m.group(3));
|
||||||
return m.group(2);
|
return m.group(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
m = gidPatternPoolNew.matcher(url.toExternalForm());
|
m = gidPatternPoolNew.matcher(url.toExternalForm());
|
||||||
@@ -199,7 +214,7 @@ public class E621Ripper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private String getFullSizedImage(URL imageURL) throws IOException {
|
private String getFullSizedImage(URL imageURL) throws IOException {
|
||||||
Document page = Http.url(imageURL).cookies(cookies).retries(3).get();
|
Document page = getDocument(imageURL.toExternalForm(), 3);
|
||||||
/*Elements video = page.select("video > source");
|
/*Elements video = page.select("video > source");
|
||||||
Elements flash = page.select("embed");
|
Elements flash = page.select("embed");
|
||||||
Elements image = page.select("a#highres");
|
Elements image = page.select("a#highres");
|
||||||
|
Reference in New Issue
Block a user