diff --git a/patch.py b/patch.py
index 5ed45626..aa53755d 100644
--- a/patch.py
+++ b/patch.py
@@ -12,6 +12,9 @@ from hashlib import sha256
# - commit all changes
message = input('message: ')
+# Strip any spaces that might've been entered before the message
+message.lstrip()
+
def get_ripme_json():
with open('ripme.json') as dataFile:
diff --git a/pom.xml b/pom.xml
index 81ad033f..39a5fb29 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
com.rarchives.ripme
ripme
jar
- 1.7.77
+ 1.7.83
ripme
http://rip.rarchives.com
diff --git a/ripme.json b/ripme.json
index 9bf13c14..6f2317bf 100644
--- a/ripme.json
+++ b/ripme.json
@@ -1,6 +1,13 @@
{
- "currentHash": "34f326ec23f3c1ce8df1147c1d9660a1dd7b85074e79351c9295bd74ac8f127a",
+ "currentHash": "4994abc3d8102823c3f35159a0759707fa4c1ccea0746081954f6acfdbe63d8f",
+ "latestVersion": "1.7.83",
"changeList": [
+ "1.7.83: Added a ripper for hentaifox.com; Added ripper for Erofus.com; Fixed fsktr not ripping some images; Added support for Gfycat profiles; Added opt to disable prefix for HentaifoundryRipper ",
+ "1.7.82: Hentai foundry now rips oldest first by default; 8muses ripper no longer makes unneeded requests; Added support for i.thechive.com",
+ "1.7.81: Added support for artstn.co; Added new boolean config twitter.rip_retweet; Fixed MulemaxRipper; Fix minor bug that could cause a freeze at pending 1",
+ "1.7.80: Fixed porncomix.one ripper; Fixed instagram ripper; Fixed Fuskator ripper; Fixed handling of urls with spaces in them",
+ "1.7.79: Fixed artstation ripper; Fixed imagefap ripper folder naming; Can now filter reddit posts by votes; Added Ripper for Xlecx; Linux/Mac updater is now pure java",
+ "1.7.78: Fixed gfycat ripper; Fixed E621 ripper; Added support for new xhamster url format; Now supports furaffinty scraps",
"1.7.77: Reduced log spam; HQporner now supports actress/category/studio/top links; Improved luscious ripper; Fixed Pornhub video ripper; Tumblr ripper now always downloads highest quality available",
"1.7.76: Fixed remember url history",
"1.7.75: Fix e-hentai ripper; added comixfap ripper; fixed writting urls to files on windows; Fixed update screen issues; Added support for hentaidude; Fixed erome ripper",
@@ -248,6 +255,5 @@
"1.0.4: Fixed spaces-in-directory bug",
"1.0.3: Added VK.com ripper",
"1.0.1: Added auto-update functionality"
- ],
- "latestVersion": "1.7.77"
+ ]
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
index b24017f7..e1c7c507 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@@ -93,6 +93,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
// We set doc to null here so the while loop below this doesn't fire
doc = null;
+ LOGGER.debug("Adding items from " + this.url + " to queue");
}
while (doc != null) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
index e708ef68..1220c5f4 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@@ -218,6 +218,44 @@ public abstract class AbstractRipper
protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map cookies,
Boolean getFileExtFromMIME);
+ /**
+ * Queues image to be downloaded and saved.
+ * @param url
+ * URL of the file
+ * @param options
+ * A map containing any changes to the default options.
+ * Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
+ * getFileExtFromMIME should be "true" or "false"
+ * @param cookies
+ * The cookies to send to the server while downloading this file.
+ * @return
+ * True if downloaded successfully
+ * False if failed to download
+ */
+ protected boolean addURLToDownload(URL url, Map options, Map cookies) {
+ // Bit of a hack but this lets us pass a bool using a map
+ boolean useMIME = options.getOrDefault("getFileExtFromMIME", "false").toLowerCase().equals("true");
+ return addURLToDownload(url, options.getOrDefault("prefix", ""), options.getOrDefault("subdirectory", ""), options.getOrDefault("referrer", null),
+ cookies, options.getOrDefault("fileName", null), options.getOrDefault("extension", null), useMIME);
+ }
+
+
+ /**
+ * Queues image to be downloaded and saved.
+ * @param url
+ * URL of the file
+ * @param options
+ * A map containing any changes to the default options.
+ * Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
+ * getFileExtFromMIME should be "true" or "false"
+ * @return
+ * True if downloaded successfully
+ * False if failed to download
+ */
+ protected boolean addURLToDownload(URL url, Map options) {
+ return addURLToDownload(url, options, null);
+ }
+
/**
* Queues image to be downloaded and saved.
* @param url
@@ -237,6 +275,22 @@ public abstract class AbstractRipper
* False if failed to download
*/
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies, String fileName, String extension, Boolean getFileExtFromMIME) {
+ // A common bug is rippers adding urls that are just "http:". This rejects said urls
+ if (url.toExternalForm().equals("http:") || url.toExternalForm().equals("https:")) {
+ LOGGER.info(url.toExternalForm() + " is a invalid url amd will be changed");
+ return false;
+
+ }
+ // Make sure the url doesn't contain any spaces as that can cause a 400 error when requesting the file
+ if (url.toExternalForm().contains(" ")) {
+ // If for some reason the url with all spaces encoded as %20 is malformed print an error
+ try {
+ url = new URL(url.toExternalForm().replaceAll(" ", "%20"));
+ } catch (MalformedURLException e) {
+ LOGGER.error("Unable to remove spaces from url\nURL: " + url.toExternalForm());
+ e.printStackTrace();
+ }
+ }
// Don't re-add the url if it was downloaded in a previous rip
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
if (hasDownloadedURL(url.toExternalForm())) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
index 6f57ec0c..3b1e7c16 100644
--- a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
+++ b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
@@ -1,7 +1,6 @@
package com.rarchives.ripme.ripper;
import java.io.*;
-import java.lang.reflect.Array;
import java.net.HttpURLConnection;
import java.net.SocketTimeoutException;
import java.net.URL;
@@ -14,13 +13,11 @@ import java.util.ResourceBundle;
import javax.net.ssl.HttpsURLConnection;
import com.rarchives.ripme.ui.MainWindow;
-import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.jsoup.HttpStatusException;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
-import static java.lang.Math.toIntExact;
/**
* Thread for downloading files.
@@ -139,6 +136,7 @@ class DownloadFileThread extends Thread {
int statusCode = huc.getResponseCode();
logger.debug("Status code: " + statusCode);
+ // If the server doesn't allow resuming downloads error out
if (statusCode != 206 && observer.tryResumeDownload() && saveAs.exists()) {
// TODO find a better way to handle servers that don't support resuming downloads then just erroring out
throw new IOException(rb.getString("server.doesnt.support.resuming.downloads"));
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java
index 611d9be6..6e1b4820 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java
@@ -7,12 +7,13 @@ import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
+import org.json.JSONObject;
+import org.jsoup.Connection;
+import org.jsoup.Connection.Method;
+import org.jsoup.Connection.Response;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
-import org.json.JSONObject;
-
public class ArtStationRipper extends AbstractJSONRipper {
enum URL_TYPE {
SINGLE_PROJECT, USER_PORTFOLIO, UNKNOWN
@@ -47,7 +48,8 @@ public class ArtStationRipper extends AbstractJSONRipper {
if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) {
// URL points to single project, use project title as GID
try {
- groupData = Http.url(albumURL.getLocation()).getJSON();
+ // groupData = Http.url(albumURL.getLocation()).getJSON();
+ groupData = getJson(albumURL.getLocation());
} catch (IOException e) {
throw new MalformedURLException("Couldn't load JSON from " + albumURL.getLocation());
}
@@ -58,7 +60,8 @@ public class ArtStationRipper extends AbstractJSONRipper {
// URL points to user portfolio, use user's full name as GID
String userInfoURL = "https://www.artstation.com/users/" + albumURL.getID() + "/quick.json";
try {
- groupData = Http.url(userInfoURL).getJSON();
+ // groupData = Http.url(userInfoURL).getJSON();
+ groupData = getJson(userInfoURL);
} catch (IOException e) {
throw new MalformedURLException("Couldn't load JSON from " + userInfoURL);
}
@@ -67,26 +70,29 @@ public class ArtStationRipper extends AbstractJSONRipper {
// No JSON found in the URL entered, can't rip
throw new MalformedURLException(
- "Expected URL to an ArtStation project or user profile - got " + url + " instead");
+ "Expected URL to an ArtStation 'project url' or 'user profile url' - got " + url + " instead");
}
@Override
protected JSONObject getFirstPage() throws IOException {
if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) {
// URL points to JSON of a single project, just return it
- return Http.url(albumURL.getLocation()).getJSON();
+ // return Http.url(albumURL.getLocation()).getJSON();
+ return getJson(albumURL.getLocation());
}
if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) {
// URL points to JSON of a list of projects, load it to parse individual
// projects
- JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON();
+ // JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON();
+ JSONObject albumContent = getJson(albumURL.getLocation());
if (albumContent.getInt("total_count") > 0) {
// Get JSON of the first project and return it
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(0);
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
- return Http.url(projectURL.getLocation()).getJSON();
+ // return Http.url(projectURL.getLocation()).getJSON();
+ return getJson(projectURL.getLocation());
}
}
@@ -112,14 +118,16 @@ public class ArtStationRipper extends AbstractJSONRipper {
}
Integer currentProject = ((projectPageNumber - 1) * 50) + (projectIndex + 1);
- JSONObject albumContent = Http.url(albumURL.getLocation() + "?page=" + projectPageNumber).getJSON();
+ // JSONObject albumContent = Http.url(albumURL.getLocation() + "?page=" + projectPageNumber).getJSON();
+ JSONObject albumContent = getJson(albumURL.getLocation() + "?page=" + projectPageNumber);
if (albumContent.getInt("total_count") > currentProject) {
// Get JSON of the next project and return it
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(projectIndex);
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
projectIndex++;
- return Http.url(projectURL.getLocation()).getJSON();
+ // return Http.url(projectURL.getLocation()).getJSON();
+ return getJson(projectURL.getLocation());
}
throw new IOException("No more projects");
@@ -181,9 +189,12 @@ public class ArtStationRipper extends AbstractJSONRipper {
/**
* Construct a new ParsedURL object.
*
- * @param urlType URL_TYPE enum containing the URL type
- * @param jsonURL String containing the JSON URL location
- * @param urlID String containing the ID of this URL
+ * @param urlType
+ * URL_TYPE enum containing the URL type
+ * @param jsonURL
+ * String containing the JSON URL location
+ * @param urlID
+ * String containing the ID of this URL
*
*/
ParsedURL(URL_TYPE urlType, String jsonURL, String urlID) {
@@ -226,7 +237,8 @@ public class ArtStationRipper extends AbstractJSONRipper {
/**
* Parses an ArtStation URL.
*
- * @param url URL to an ArtStation user profile
+ * @param url
+ * URL to an ArtStation user profile
* (https://www.artstation.com/username) or single project
* (https://www.artstation.com/artwork/projectid)
* @return ParsedURL object containing URL type, JSON location and ID (stores
@@ -239,7 +251,30 @@ public class ArtStationRipper extends AbstractJSONRipper {
// Load HTML Source of the specified URL
try {
- htmlSource = Http.url(url).get().html();
+ // htmlSource = Http.url(url).get().html();
+ Connection con = Http.url(url).method(Method.GET).connection();
+ con.ignoreHttpErrors(true);
+ con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
+ con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
+ con.header("Accept-Language", "en-US,en;q=0.5");
+ con.header("Accept-Encoding", "gzip, deflate, br");
+ con.header("Upgrade-Insecure-Requests", "1");
+ Response res = con.execute();
+ int status = res.statusCode();
+
+ if (status / 100 == 2) {
+ htmlSource = res.parse().html();
+ } else if (status == 403 && url.toString().contains("artwork/")) {
+ // Catches cloudflare page. Error 403.
+ // Usually caused by artwork URLs( arstation.com/artwork/someProjectId)
+ String urlId = url.toString().substring(url.toString().lastIndexOf("/") + 1);
+ String jsonURL = "https://www.artstation.com/projects/" + urlId + ".json";
+ parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT, jsonURL, urlId);
+ return parsedURL;
+ } else {
+ LOGGER.error("Couldnt fetch URL: " + url);
+ throw new IOException("Error fetching URL: " + url + " Status Code: " + status);
+ }
} catch (IOException e) {
htmlSource = "";
}
@@ -266,5 +301,28 @@ public class ArtStationRipper extends AbstractJSONRipper {
parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null);
return parsedURL;
}
+
+ // Use this method instead of direct call to Http.url(url).getJson() to avoid cloudflare 403 page.
+ private JSONObject getJson(URL url) throws IOException {
+ Connection con = Http.url(url).method(Method.GET).connection();
+ con.ignoreHttpErrors(true);
+ con.ignoreContentType(true);
+ con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
+ con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
+ con.header("Accept-Language", "en-US,en;q=0.5");
+ con.header("Accept-Encoding", "gzip, deflate, br");
+ con.header("Upgrade-Insecure-Requests", "1");
+ Response res = con.execute();
+ int status = res.statusCode();
+ if (status / 100 == 2) {
+ String jsonString = res.body();
+ return new JSONObject(jsonString);
+ }
+ throw new IOException("Error fetching json. Status code:" + status);
+ }
+
+ private JSONObject getJson(String url) throws IOException{
+ return getJson(new URL(url));
+ }
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ArtstnRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtstnRipper.java
new file mode 100644
index 00000000..82b6e97c
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtstnRipper.java
@@ -0,0 +1,58 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+
+import org.jsoup.Connection.Response;
+
+import com.rarchives.ripme.utils.Http;
+
+/*
+ * Ripper for ArtStation's short URL domain.
+ * Example URL: https://artstn.co/p/JlE15Z
+ */
+
+public class ArtstnRipper extends ArtStationRipper {
+ public URL artStationUrl = null;
+
+ public ArtstnRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public boolean canRip(URL url) {
+ return url.getHost().endsWith("artstn.co");
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ if (artStationUrl == null) {
+ // Run only once.
+ try {
+ artStationUrl = getFinalUrl(url);
+ if (artStationUrl == null) {
+ throw new IOException("Null url received.");
+ }
+ } catch (IOException e) {
+ LOGGER.error("Couldnt resolve URL.", e);
+ }
+
+ }
+ return super.getGID(artStationUrl);
+ }
+
+ public URL getFinalUrl(URL url) throws IOException {
+ if (url.getHost().endsWith("artstation.com")) {
+ return url;
+ }
+
+ LOGGER.info("Checking url: " + url);
+ Response response = Http.url(url).connection().followRedirects(false).execute();
+ if (response.statusCode() / 100 == 3 && response.hasHeader("location")) {
+ return getFinalUrl(new URL(response.header("location")));
+ } else {
+ return null;
+ }
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
index 1e47b4dd..a55cdf09 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
@@ -12,11 +12,13 @@ import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
+import com.rarchives.ripme.utils.Utils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
public class ChanRipper extends AbstractHTMLRipper {
- private static List explicit_domains = Arrays.asList(
+ private static List bakedin_explicit_domains = Arrays.asList(
new ChanSite("boards.4chan.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")),
new ChanSite("boards.4channel.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")),
new ChanSite("4archive.org", "imgur.com"),
@@ -29,6 +31,34 @@ public class ChanRipper extends AbstractHTMLRipper {
new ChanSite("desuarchive.org", "desu-usergeneratedcontent.xyz"),
new ChanSite("8ch.net", "media.8ch.net")
);
+ private static List user_give_explicit_domains = getChansFromConfig(Utils.getConfigString("chans.chan_sites", null));
+ private static List explicit_domains = new ArrayList<>();
+
+ /**
+ * reads a string in the format of site1[cdn|cdn2|cdn3], site2[cdn]
+ */
+ public static List getChansFromConfig(String rawChanString) {
+ List userChans = new ArrayList<>();
+ if (rawChanString != null) {
+ String[] listOfChans = rawChanString.split(",");
+ for (String chanInfo : listOfChans) {
+ // If this is true we're parsing a chan with cdns
+ if (chanInfo.contains("[")) {
+ String siteUrl = chanInfo.split("\\[")[0];
+ String[] cdns = chanInfo.replaceAll(siteUrl + "\\[", "").replaceAll("]", "").split("\\|");
+ LOGGER.debug("site url: " + siteUrl);
+ LOGGER.debug("cdn: " + Arrays.toString(cdns));
+ userChans.add(new ChanSite(siteUrl, Arrays.asList(cdns)));
+ } else {
+ // We're parsing a site without cdns
+ LOGGER.debug("site: " + chanInfo);
+ userChans.add(new ChanSite(chanInfo));
+ }
+ }
+ return userChans;
+ }
+ return null;
+ }
private static List url_piece_blacklist = Arrays.asList(
"=http",
@@ -43,6 +73,7 @@ public class ChanRipper extends AbstractHTMLRipper {
public ChanRipper(URL url) throws IOException {
super(url);
for (ChanSite _chanSite : explicit_domains) {
+ LOGGER.info(_chanSite.domains);
if (_chanSite.domains.contains(url.getHost())) {
chanSite = _chanSite;
generalChanSite = false;
@@ -86,6 +117,10 @@ public class ChanRipper extends AbstractHTMLRipper {
@Override
public boolean canRip(URL url) {
+ explicit_domains.addAll(bakedin_explicit_domains);
+ if (user_give_explicit_domains != null) {
+ explicit_domains.addAll(user_give_explicit_domains);
+ }
for (ChanSite _chanSite : explicit_domains) {
if (_chanSite.domains.contains(url.getHost())) {
return true;
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java
new file mode 100644
index 00000000..08b27a76
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java
@@ -0,0 +1,173 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+
+/**
+ * @author Tushar
+ *
+ */
+public class ComicextraRipper extends AbstractHTMLRipper {
+
+ private static final String FILE_NAME = "page";
+
+ private Pattern p1 =
+ Pattern.compile("https:\\/\\/www.comicextra.com\\/comic\\/([A-Za-z0-9_-]+)");
+ private Pattern p2 = Pattern.compile(
+ "https:\\/\\/www.comicextra.com\\/([A-Za-z0-9_-]+)\\/([A-Za-z0-9_-]+)(?:\\/full)?");
+ private UrlType urlType = UrlType.UNKNOWN;
+ private List chaptersList = null;
+ private int chapterIndex = -1; // index for the chaptersList, useful in getting the next page.
+ private int imageIndex = 0; // image index for each chapter images.
+
+ public ComicextraRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ protected String getDomain() {
+ return "comicextra.com";
+ }
+
+ @Override
+ public String getHost() {
+ return "comicextra";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Matcher m1 = p1.matcher(url.toExternalForm());
+ if (m1.matches()) {
+ // URL is of comic( https://www.comicextra.com/comic/the-punisher-frank-castle-max).
+ urlType = UrlType.COMIC;
+ return m1.group(1);
+ }
+
+ Matcher m2 = p2.matcher(url.toExternalForm());
+ if (m2.matches()) {
+ // URL is of chapter( https://www.comicextra.com/the-punisher-frank-castle-max/chapter-75).
+ urlType = UrlType.CHAPTER;
+ return m2.group(1);
+ }
+
+ throw new MalformedURLException(
+ "Expected comicextra.com url of type: https://www.comicextra.com/comic/some-comic-name\n"
+ + " or https://www.comicextra.com/some-comic-name/chapter-001 got " + url
+ + " instead");
+ }
+
+ @Override
+ protected Document getFirstPage() throws IOException {
+ Document doc = null;
+
+ switch (urlType) {
+ case COMIC:
+ // For COMIC type url we extract the urls of each chapters and store them in chapters.
+ chaptersList = new ArrayList<>();
+ Document comicPage = Http.url(url).get();
+ Elements elements = comicPage.select("div.episode-list a");
+ for (Element e : elements) {
+ chaptersList.add(getCompleteChapterUrl(e.attr("abs:href")));
+ }
+
+ // Set the first chapter from the chapterList as the doc.
+ chapterIndex = 0;
+ doc = Http.url(chaptersList.get(chapterIndex)).get();
+ break;
+ case CHAPTER:
+ doc = Http.url(url).get();
+ break;
+ case UNKNOWN:
+ default:
+ throw new IOException("Unknown url type encountered.");
+ }
+
+ return doc;
+ }
+
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ if (urlType == UrlType.COMIC) {
+ ++chapterIndex;
+ imageIndex = 0; // Resetting the imagesIndex so that images prefix within each chapter starts from '001_'.
+ if (chapterIndex < chaptersList.size()) {
+ return Http.url(chaptersList.get(chapterIndex)).get();
+ }
+ }
+
+ return super.getNextPage(doc);
+ }
+
+ @Override
+ protected List getURLsFromPage(Document page) {
+ List urls = new ArrayList<>();
+
+ if (urlType == UrlType.COMIC || urlType == UrlType.CHAPTER) {
+ Elements images = page.select("img.chapter_img");
+ for (Element img : images) {
+ urls.add(img.attr("src"));
+ }
+ }
+
+ return urls;
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ String subdirectory = getSubDirectoryName();
+ String prefix = getPrefix(++imageIndex);
+
+ addURLToDownload(url, prefix, subdirectory, null, null, FILE_NAME, null, Boolean.TRUE);
+ }
+
+ /*
+ * This function appends /full at the end of the chapters url to get all the images for the
+ * chapter in the same Document.
+ */
+ private String getCompleteChapterUrl(String chapterUrl) {
+ if (!chapterUrl.endsWith("/full")) {
+ chapterUrl = chapterUrl + "/full";
+ }
+ return chapterUrl;
+ }
+
+ /*
+ * This functions returns sub folder name for the current chapter.
+ */
+ private String getSubDirectoryName() {
+ String subDirectory = "";
+
+ if (urlType == UrlType.COMIC) {
+ Matcher m = p2.matcher(chaptersList.get(chapterIndex));
+ if (m.matches()) {
+ subDirectory = m.group(2);
+ }
+ }
+
+ if (urlType == UrlType.CHAPTER) {
+ Matcher m = p2.matcher(url.toExternalForm());
+ if (m.matches()) {
+ subDirectory = m.group(2);
+ }
+ }
+
+ return subDirectory;
+ }
+
+ /*
+ * Enum to classify different types of urls.
+ */
+ private enum UrlType {
+ COMIC, CHAPTER, UNKNOWN
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
index ad7d79fa..8a24e2c9 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
@@ -1,409 +1,645 @@
package com.rarchives.ripme.ripper.rippers;
-import com.rarchives.ripme.ripper.AbstractJSONRipper;
-import com.rarchives.ripme.utils.Base64;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.ripper.DownloadThreadPool;
+import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
-import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.net.HttpURLConnection;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Base64;
import java.util.HashMap;
-import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.json.JSONArray;
-import org.json.JSONObject;
+import org.jsoup.Connection;
+import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
-import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
+/**
+ *
+ * @author MrPlaygon
+ *
+ * NOT using Deviantart API like the old JSON ripper because it is SLOW
+ * and somehow annoying to use. Things to consider: Using the API might
+ * be less work/maintenance later because APIs do not change as
+ * frequently as HTML source code does...?
+ *
+ *
+ *
+ * Tested for:
+ *
+ * SFW:
+ *
+ * https://www.deviantart.com/apofiss/gallery/41388863/sceneries
+ * https://www.deviantart.com/kageuri/gallery/
+ * https://www.deviantart.com/kageuri/gallery/?catpath=/
+ * https://www.deviantart.com/apofiss/favourites/39881418/gifts-and
+ * https://www.deviantart.com/kageuri/favourites/
+ * https://www.deviantart.com/kageuri/favourites/?catpath=/
+ *
+ * NSFW:
+ *
+ * https://www.deviantart.com/revpeng/gallery/67734353/Siren-Lee-Agent-of-S-I-R-E-N-S
+ *
+ *
+ * Deactivated account:
+ *
+ * https://www.deviantart.com/gingerbreadpony
+ *
+ * Banned Account:
+ *
+ * https://www.deviantart.com/ghostofflossenburg
+ *
+ *
+ *
+ *
+ * Login Data (PLEASE DONT ACTUALLY USE!!!):
+ *
+ * email: 5g5_8l4dii5lbbpc@byom.de
+ *
+ * username: 5g58l4dii5lbbpc
+ *
+ * password: 5g5_8l4dii5lbbpc
+ *
+ *
+ *
+ */
+public class DeviantartRipper extends AbstractHTMLRipper {
-public class DeviantartRipper extends AbstractJSONRipper {
- String requestID;
- String galleryID;
- String username;
- String baseApiUrl = "https://www.deviantart.com/dapi/v1/gallery/";
- String csrf;
- Map pageCookies = new HashMap<>();
+ private final String username = "5g58l4dii5lbbpc";
+ private final String password = "5g5_8l4dii5lbbpc";
+ private int offset = 0;
+ private boolean usingCatPath = false;
+ private int downloadCount = 0;
+ private Map cookies = new HashMap();
+ private DownloadThreadPool deviantartThreadPool = new DownloadThreadPool("deviantart");
+ private ArrayList names = new ArrayList();
- private static final int PAGE_SLEEP_TIME = 3000,
- IMAGE_SLEEP_TIME = 2000;
+ List allowedCookies = Arrays.asList("agegate_state", "userinfo", "auth", "auth_secure");
- private Map cookies = new HashMap<>();
- private Set triedURLs = new HashSet<>();
+ private Connection conn = null;
- public DeviantartRipper(URL url) throws IOException {
- super(url);
- }
+ // Constants
+ private final String referer = "https://www.deviantart.com/";
+ private final String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0";
+ private final String utilsKey = "DeviantartLogin.cookies";
- String loginCookies = "auth=__0f9158aaec09f417b235%3B%221ff79836392a515d154216d919eae573%22;" +
- "auth_secure=__41d14dd0da101f411bb0%3B%2281cf2cf9477776162a1172543aae85ce%22;" +
- "userinfo=__bf84ac233bfa8ae642e8%3B%7B%22username%22%3A%22grabpy%22%2C%22uniqueid%22%3A%22a0a876aa37dbd4b30e1c80406ee9c280%22%2C%22vd%22%3A%22BbHUXZ%2CBbHUXZ%2CA%2CU%2CA%2C%2CB%2CA%2CB%2CBbHUXZ%2CBbHUdj%2CL%2CL%2CA%2CBbHUdj%2C13%2CA%2CB%2CA%2C%2CA%2CA%2CB%2CA%2CA%2C%2CA%22%2C%22attr%22%3A56%7D";
+ @Override
+ public DownloadThreadPool getThreadPool() {
+ return deviantartThreadPool;
+ }
- @Override
- public String getHost() {
- return "deviantart";
- }
+ public DeviantartRipper(URL url) throws IOException {
+ super(url);
+ }
- @Override
- public String getDomain() {
- return "deviantart.com";
- }
+ @Override
+ protected String getDomain() {
+ return "deviantart.com";
+ }
- @Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- String u = url.toExternalForm();
- if (u.contains("/gallery/")) {
- return url;
- } else if (u.contains("/favourites")) {
- return url;
- } else if (u.contains("/favorites")) {
- return url;
- }
+ @Override
+ public String getHost() {
+ return "deviantart";
+ }
- if (!u.endsWith("/gallery/") && !u.endsWith("/gallery")) {
- if (!u.endsWith("/")) {
- u += "/gallery/";
- } else {
- u += "gallery/";
- }
- }
+ @Override
+ protected Document getFirstPage() throws IOException {
+ if (isDeactivated()) {
+ throw new IOException("Account Deactivated");
+ }
+ login();
+ // Saving connection to reuse later for following pages.
+ this.conn = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(this.referer)
+ .userAgent(this.userAgent).connection();
- Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/([0-9]+)/*?$");
- Matcher m = p.matcher(url.toExternalForm());
- if (!m.matches()) {
- String subdir = "/";
- if (u.contains("catpath=scraps")) {
- subdir = "scraps";
- }
- u = u.replaceAll("\\?.*", "?catpath=" + subdir);
- }
- return new URL(u);
- }
+ return this.conn.get();
+ }
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)(/gallery)?/?(\\?.*)?$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- // Root gallery
- if (url.toExternalForm().contains("catpath=scraps")) {
- return m.group(1) + "_scraps";
- }
- else {
- return m.group(1);
- }
- }
- p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/gallery/([0-9]+).*$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- // Subgallery
- return m.group(1) + "_" + m.group(2);
- }
- p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/([0-9]+)/.*?$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1) + "_faves_" + m.group(2);
- }
- p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/?$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- // Subgallery
- return m.group(1) + "_faves";
- }
- throw new MalformedURLException("Expected URL format: http://www.deviantart.com/username[/gallery/#####], got: " + url);
- }
+ /**
+ * Checks if the URL refers to a deactivated account using the HTTP status Codes
+ *
+ * @return true when the account is good
+ * @throws IOException when the account is deactivated
+ */
+ private boolean isDeactivated() throws IOException {
+ Response res = Http.url(this.url).connection().followRedirects(true).referrer(this.referer)
+ .userAgent(this.userAgent).execute();
+ return res.statusCode() != 200 ? true : false;
- private String getUsernameFromURL(String u) {
- Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/gallery/?(\\S+)?");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
- return null;
+ }
- }
+ /**
+ * Stores logged in Cookies. Needed for art pieces only visible to logged in
+ * users.
+ *
+ *
+ * @throws IOException when failed to load webpage or failed to read/write
+ * cookies in file (used when running multiple instances of
+ * RipMe)
+ */
+ private void login() throws IOException {
- private String getFullsizedNSFWImage(String pageURL) {
- try {
- Document doc = Http.url(pageURL).cookies(cookies).get();
- String imageToReturn = "";
- String[] d = doc.select("img").attr("srcset").split(",");
+ String customUsername = Utils.getConfigString("DeviantartCustomLoginUsername", this.username);
+ String customPassword = Utils.getConfigString("DeviantartCustomLoginPassword", this.password);
+ try {
+ String dACookies = Utils.getConfigString(utilsKey, null);
+ updateCookie(dACookies != null ? deserialize(dACookies) : null);
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ }
+ if (getDACookie() == null || !checkLogin()) {
+ LOGGER.info("Do Login now");
+ // Do login now
- String s = d[d.length -1].split(" ")[0];
- LOGGER.info("2:" + s);
+ // Load login page
+ Response res = Http.url("https://www.deviantart.com/users/login").connection().method(Method.GET)
+ .referrer(referer).userAgent(userAgent).execute();
- if (s == null || s.equals("")) {
- LOGGER.error("Could not find full sized image at " + pageURL);
- }
- return s;
- } catch (IOException e) {
- LOGGER.error("Could not find full sized image at " + pageURL);
- return null;
- }
- }
+ updateCookie(res.cookies());
- /**
- * Gets first page.
- * Will determine if login is supplied,
- * if there is a login, then login and add that login cookies.
- * Otherwise, just bypass the age gate with an anonymous flag.
- * @return
- * @throws IOException
- */
- @Override
- public JSONObject getFirstPage() throws IOException {
-
- // Base64 da login
- // username: Z3JhYnB5
- // password: ZmFrZXJz
+ // Find tokens
+ Document doc = res.parse();
+ Element form = doc.getElementById("login");
+ String token = form.select("input[name=\"validate_token\"]").first().attr("value");
+ String key = form.select("input[name=\"validate_key\"]").first().attr("value");
+ LOGGER.info("Token: " + token + " & Key: " + key);
+ // Build Login Data
+ HashMap loginData = new HashMap();
+ loginData.put("challenge", "");
+ loginData.put("username", customUsername);
+ loginData.put("password", customPassword);
+ loginData.put("remember_me", "1");
+ loginData.put("validate_token", token);
+ loginData.put("validate_key", key);
+ Map cookies = res.cookies();
- cookies = getDACookies();
- if (cookies.isEmpty()) {
- LOGGER.warn("Failed to get login cookies");
- cookies.put("agegate_state","1"); // Bypasses the age gate
- }
- cookies.put("agegate_state", "1");
-
- Response res = Http.url(this.url)
- .cookies(cookies)
- .response();
- Document page = res.parse();
+ // Log in using data. Handle redirect
+ res = Http.url("https://www.deviantart.com/users/login").connection().referrer(referer).userAgent(userAgent)
+ .method(Method.POST).data(loginData).cookies(cookies).followRedirects(false).execute();
+ updateCookie(res.cookies());
- JSONObject firstPageJSON = getFirstPageJSON(page);
- requestID = firstPageJSON.getJSONObject("dapx").getString("requestid");
- galleryID = getGalleryID(page);
- username = getUsernameFromURL(url.toExternalForm());
- csrf = firstPageJSON.getString("csrf");
- pageCookies = res.cookies();
+ res = Http.url(res.header("location")).connection().referrer(referer).userAgent(userAgent)
+ .method(Method.GET).cookies(cookies).followRedirects(false).execute();
- return requestPage(0, galleryID, username, requestID, csrf, pageCookies);
- }
+ // Store cookies
+ updateCookie(res.cookies());
- private JSONObject requestPage(int offset, String galleryID, String username, String requestID, String csfr, Map c) {
- LOGGER.debug("offset: " + Integer.toString(offset));
- LOGGER.debug("galleryID: " + galleryID);
- LOGGER.debug("username: " + username);
- LOGGER.debug("requestID: " + requestID);
- String url = baseApiUrl + galleryID + "?iid=" + requestID;
- try {
- Document doc = Http.url(url).cookies(c).data("username", username).data("offset", Integer.toString(offset))
- .data("limit", "24").data("_csrf", csfr).data("id", requestID)
- .ignoreContentType().post();
- return new JSONObject(doc.body().text());
- } catch (IOException e) {
- LOGGER.error("Got error trying to get page: " + e.getMessage());
- e.printStackTrace();
- return null;
- }
+ // Write Cookie to file for other RipMe Instances or later use
+ Utils.setConfigString(utilsKey, serialize(new HashMap(getDACookie())));
+ Utils.saveConfig(); // save now because of other instances that might work simultaneously
+ } else {
+ LOGGER.info("No new Login needed");
+ }
- }
+ LOGGER.info("DA Cookies: " + getDACookie());
+ }
- private JSONObject getFirstPageJSON(Document doc) {
- for (Element js : doc.select("script")) {
- if (js.html().contains("requestid")) {
- String json = js.html().replaceAll("window.__initial_body_data=", "").replaceAll("\\);", "")
- .replaceAll(";__wake\\(.+", "");
- JSONObject j = new JSONObject(json);
- return j;
- }
- }
- return null;
- }
+ /**
+ * Returns next page Document using offset.
+ */
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ this.offset += 24;
+ this.conn.url(urlWithParams(this.offset)).cookies(getDACookie());
+ Response re = this.conn.execute();
+// Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer).userAgent(userAgent)
+// .response();
+ updateCookie(re.cookies());
+ Document docu = re.parse();
+ Elements messages = docu.getElementsByClass("message");
+ LOGGER.info("Current Offset: " + this.offset);
- public String getGalleryID(Document doc) {
- // If the url contains catpath we return 0 as the DA api will provide all galery images if you sent the
- // gallery id to 0
- if (url.toExternalForm().contains("catpath=")) {
- return "0";
- }
- Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/[a-zA-Z0-9\\-]+/gallery/([0-9]+)/?\\S+");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
- for (Element el : doc.select("input[name=set]")) {
- try {
- String galleryID = el.attr("value");
- return galleryID;
- } catch (NullPointerException e) {
- continue;
- }
- }
- LOGGER.error("Could not find gallery ID");
- return null;
- }
+ if (messages.size() > 0) {
- public String getUsername(Document doc) {
- return doc.select("meta[property=og:title]").attr("content")
- .replaceAll("'s DeviantArt gallery", "").replaceAll("'s DeviantArt Gallery", "");
- }
-
+ // if message exists -> last page
+ LOGGER.info("Messages amount: " + messages.size() + " - Next Page does not exists");
+ throw new IOException("No more pages");
+ }
- @Override
- public List getURLsFromJSON(JSONObject json) {
- List imageURLs = new ArrayList<>();
- JSONArray results = json.getJSONObject("content").getJSONArray("results");
- for (int i = 0; i < results.length(); i++) {
- Document doc = Jsoup.parseBodyFragment(results.getJSONObject(i).getString("html"));
- if (doc.html().contains("ismature")) {
- LOGGER.info("Downloading nsfw image");
- String nsfwImage = getFullsizedNSFWImage(doc.select("span").attr("href"));
- if (nsfwImage != null && nsfwImage.startsWith("http")) {
- imageURLs.add(nsfwImage);
- }
- }
- try {
- String imageURL = doc.select("span").first().attr("data-super-full-img");
- if (!imageURL.isEmpty() && imageURL.startsWith("http")) {
- imageURLs.add(imageURL);
- }
- } catch (NullPointerException e) {
- LOGGER.info(i + " does not contain any images");
- }
+ return Http.url(urlWithParams(this.offset)).referrer(referer).userAgent(userAgent).cookies(getDACookie()).get();
- }
- return imageURLs;
- }
+ }
+ /**
+ * Returns list of Links to the Image pages. NOT links to fullsize image!!! e.g.
+ * https://www.deviantart.com/kageuri/art/RUBY-568396655
+ */
+ @Override
+ protected List getURLsFromPage(Document page) {
- @Override
- public JSONObject getNextPage(JSONObject page) throws IOException {
- boolean hasMore = page.getJSONObject("content").getBoolean("has_more");
- if (hasMore) {
- return requestPage(page.getJSONObject("content").getInt("next_offset"), galleryID, username, requestID, csrf, pageCookies);
- }
+ List result = new ArrayList();
- throw new IOException("No more pages");
- }
+ Element div;
+ if (usingCatPath) {
+ div = page.getElementById("gmi-");
- @Override
- public boolean keepSortOrder() {
- // Don't keep sort order (do not add prefixes).
- // Causes file duplication, as outlined in https://github.com/4pr0n/ripme/issues/113
- return false;
- }
+ } else {
+ div = page.getElementsByClass("folderview-art").first().child(0);
- @Override
- public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
- sleep(IMAGE_SLEEP_TIME);
- }
+ }
+ Elements links = div.select("a.torpedo-thumb-link");
- /**
- * Tries to get full size image from thumbnail URL
- * @param thumb Thumbnail URL
- * @param throwException Whether or not to throw exception when full size image isn't found
- * @return Full-size image URL
- * @throws Exception If it can't find the full-size URL
- */
- private static String thumbToFull(String thumb, boolean throwException) throws Exception {
- thumb = thumb.replace("http://th", "http://fc");
- List fields = new ArrayList<>(Arrays.asList(thumb.split("/")));
- fields.remove(4);
- if (!fields.get(4).equals("f") && throwException) {
- // Not a full-size image
- throw new Exception("Can't get full size image from " + thumb);
- }
- StringBuilder result = new StringBuilder();
- for (int i = 0; i < fields.size(); i++) {
- if (i > 0) {
- result.append("/");
- }
- result.append(fields.get(i));
- }
- return result.toString();
- }
+ for (Element el : links) {
+ result.add(el.attr("href"));
+ }
+ LOGGER.info("Amount of Images on Page: " + result.size());
+ LOGGER.info(page.location());
- /**
- * If largest resolution for image at 'thumb' is found, starts downloading
- * and returns null.
- * If it finds a larger resolution on another page, returns the image URL.
- * @param thumb Thumbnail URL
- * @param page Page the thumbnail is retrieved from
- * @return Highest-resolution version of the image based on thumbnail URL and the page.
- */
- private String smallToFull(String thumb, String page) {
- try {
- // Fetch the image page
- Response resp = Http.url(page)
- .referrer(this.url)
- .cookies(cookies)
- .response();
- cookies.putAll(resp.cookies());
- Document doc = resp.parse();
- Elements els = doc.select("img.dev-content-full");
- String fsimage = null;
- // Get the largest resolution image on the page
- if (!els.isEmpty()) {
- // Large image
- fsimage = els.get(0).attr("src");
- LOGGER.info("Found large-scale: " + fsimage);
- if (fsimage.contains("//orig")) {
- return fsimage;
- }
- }
- // Try to find the download button
- els = doc.select("a.dev-page-download");
- if (!els.isEmpty()) {
- // Full-size image
- String downloadLink = els.get(0).attr("href");
- LOGGER.info("Found download button link: " + downloadLink);
- HttpURLConnection con = (HttpURLConnection) new URL(downloadLink).openConnection();
- con.setRequestProperty("Referer",this.url.toString());
- String cookieString = "";
- for (Map.Entry entry : cookies.entrySet()) {
- cookieString = cookieString + entry.getKey() + "=" + entry.getValue() + "; ";
- }
- cookieString = cookieString.substring(0,cookieString.length() - 1);
- con.setRequestProperty("Cookie",cookieString);
- con.setRequestProperty("User-Agent", USER_AGENT);
- con.setInstanceFollowRedirects(true);
- con.connect();
- int code = con.getResponseCode();
- String location = con.getURL().toString();
- con.disconnect();
- if (location.contains("//orig")) {
- fsimage = location;
- LOGGER.info("Found image download: " + location);
- }
- }
- if (fsimage != null) {
- return fsimage;
- }
- throw new IOException("No download page found");
- } catch (IOException ioe) {
- try {
- LOGGER.info("Failed to get full size download image at " + page + " : '" + ioe.getMessage() + "'");
- String lessThanFull = thumbToFull(thumb, false);
- LOGGER.info("Falling back to less-than-full-size image " + lessThanFull);
- return lessThanFull;
- } catch (Exception e) {
- return null;
- }
- }
- }
+ return result;
+ }
- /**
- * Returns DA cookies.
- * @return Map of cookies containing session data.
- */
- private Map getDACookies() {
- return RipUtils.getCookiesFromString(Utils.getConfigString("deviantart.cookies", loginCookies));
- }
+ /**
+ * Starts new Thread to find download link + filename + filetype
+ */
+ @Override
+ protected void downloadURL(URL url, int index) {
+ this.downloadCount += 1;
+ LOGGER.info("Downloading URL Number " + this.downloadCount);
+ LOGGER.info("Deviant Art URL: " + url.toExternalForm());
+ try {
+ Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer)
+ .userAgent(userAgent).response();
+ updateCookie(re.cookies());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ // Start Thread and add to pool.
+ DeviantartImageThread t = new DeviantartImageThread(url);
+ deviantartThreadPool.addThread(t);
+
+ }
+
+ @Override
+ public String normalizeUrl(String url) {
+ return (urlWithParams(this.offset).toExternalForm());
+ }
+
+ /**
+ * Returns name of album. Album name consists of 3 words: - Artist (owner of
+ * gallery) - Type (gallery or favorites folder) - Name of the folder
+ *
+ * Returns artist_type_name
+ */
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+
+ String s = url.toExternalForm();
+ String artist = "unknown";
+ String what = "unknown";
+ String albumname = "unknown";
+
+ if (url.toExternalForm().contains("catpath=/")) {
+ this.usingCatPath = true;
+ }
+
+ Pattern p = Pattern.compile("^https?://www.deviantart\\.com/([a-zA-Z0-9]+).*$");
+ Matcher m = p.matcher(s);
+
+ // Artist
+ if (m.matches()) {
+ artist = m.group(1);
+ } else {
+ throw new MalformedURLException("Expected deviantart.com URL format: "
+ + "www.deviantart.com//gallery//\nOR\nwww.deviantart.com//favourites//\\nOr simply the gallery or favorites of some artist - got "
+ + url + " instead");
+ }
+
+ // What is it
+ if (s.contains("/gallery/")) {
+ what = "gallery";
+ } else if (s.contains("/favourites/")) {
+ what = "favourites";
+ } else {
+ throw new MalformedURLException("Expected deviantart.com URL format: "
+ + "www.deviantart.com//gallery//\nOR\nwww.deviantart.com//favourites//\nOr simply the gallery or favorites of some artist - got "
+ + url + " instead");
+ }
+
+ // Album Name
+ Pattern artistP = Pattern
+ .compile("^https?://www.deviantart\\.com/[a-zA-Z0-9]+/[a-zA-Z]+/[0-9]+/([a-zA-Z0-9-]+).*$");
+ Matcher artistM = artistP.matcher(s);
+ if (s.endsWith("?catpath=/")) {
+ albumname = "all";
+ } else if (s.endsWith("/favourites/") || s.endsWith("/gallery/")) {
+ albumname = "featured";
+ } else if (artistM.matches()) {
+ albumname = artistM.group(1);
+ }
+ LOGGER.info("Album Name: " + artist + "_" + what + "_" + albumname);
+
+ return artist + "_" + what + "_" + albumname;
+
+ }
+
+ /**
+ *
+ * @return Clean URL as String
+ */
+ private String cleanURL() {
+ return (this.url.toExternalForm().split("\\?"))[0];
+ }
+
+ /**
+ * Return correct url with params (catpath) and current offset
+ *
+ * @return URL to page with offset
+ */
+ private URL urlWithParams(int offset) {
+ try {
+ String url = cleanURL();
+ if (this.usingCatPath) {
+ return (new URL(url + "?catpath=/&offset=" + offset));
+ } else {
+ return (new URL(url + "?offset=" + offset));
+ }
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
+ }
+ return null;
+ }
+
+ /**
+ * Returns Hashmap usable as Cookie for NSFW Artworks Not really needed but
+ * maybe useful later.
+ *
+ * @return Cookie Hashmap
+ */
+ private Map getDACookie() {
+ return this.cookies;
+ }
+
+ /**
+ * Updates cookies
+ *
+ * @param m new Cookies
+ */
+ private void updateCookie(Map m) {
+
+ if (m == null) {
+ return;
+ }
+
+ Iterator iter = m.keySet().iterator();
+ while (iter.hasNext()) {
+ String current = iter.next();
+ if (!this.allowedCookies.contains(current)) {
+ // m.remove(current);
+ iter.remove();
+ }
+ }
+
+ LOGGER.info("Updating Cookies");
+ LOGGER.info("Old Cookies: " + getDACookie() + " ");
+ LOGGER.info("New Cookies: " + m + " ");
+ this.cookies.putAll(m);
+ this.cookies.put("agegate_state", "1");
+ LOGGER.info("Merged Cookies: " + getDACookie() + " ");
+
+ try {
+ Utils.setConfigString(utilsKey, serialize(new HashMap(getDACookie())));
+ Utils.saveConfig();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ /**
+ * Serializes an Object and returns a String ready to store Used to store
+ * cookies in the config file because the deviantart cookies contain all sort of
+ * special characters like ; , = : and so on.
+ *
+ * @param o Object to serialize
+ * @return The serialized base64 encoded object
+ * @throws IOException
+ */
+ private String serialize(Serializable o) throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ObjectOutputStream oos = new ObjectOutputStream(baos);
+ oos.writeObject(o);
+ oos.close();
+ return Base64.getEncoder().encodeToString(baos.toByteArray());
+ }
+
+ /**
+ * Recreates the object from the base64 encoded String. Used for Cookies
+ *
+ * @param s the Base64 encoded string
+ * @return the Cookie Map
+ * @throws IOException
+ * @throws ClassNotFoundException
+ */
+ private Map deserialize(String s) throws IOException, ClassNotFoundException {
+ byte[] data = Base64.getDecoder().decode(s);
+ ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data));
+ HashMap o = (HashMap) ois.readObject(); // Unchecked cast here but should never
+ // be something else
+ ois.close();
+ return o;
+ }
+
+ /**
+ * Checks if the current cookies are still valid/usable. Also checks if agegate
+ * is given.
+ *
+ * @return True when all is good.
+ */
+ private boolean checkLogin() {
+ if (!getDACookie().containsKey("agegate_state")) {
+ LOGGER.info("No agegate key");
+ return false;
+ } else if (!getDACookie().get("agegate_state").equals("1")) {
+ LOGGER.info("Wrong agegate value");
+ return false;
+ }
+
+ try {
+ LOGGER.info("Login with Cookies: " + getDACookie());
+ Response res = Http.url("https://www.deviantart.com/users/login").connection().followRedirects(true)
+ .cookies(getDACookie()).referrer(this.referer).userAgent(this.userAgent).execute();
+ if (!res.url().toExternalForm().equals("https://www.deviantart.com/users/login")) {
+ LOGGER.info("Cookies are valid");
+ LOGGER.info(res.url());
+ return true;
+ } else {
+ LOGGER.info("Cookies invalid. Wrong URL: " + res.url());
+ LOGGER.info(res.statusCode());
+ LOGGER.info(res.parse());
+ return false;
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ return false;
+ }
+ }
+
+ /**
+ * Analyzes an image page like
+ * https://www.deviantart.com/kageuri/art/RUBY-568396655 .
+ *
+ * Looks for download button, follows the authentications and redirects and adds
+ * the Image URL to the download queue. If no download button is present it will
+ * use the largest version of the image.
+ *
+ * Should work with all filetypes on Deviantart. Tested with .JPG .PNG and .PDF
+ *
+ * @author MrPlaygon
+ *
+ */
+ private class DeviantartImageThread extends Thread {
+ private URL url;
+
+ public DeviantartImageThread(URL url) {
+ this.url = url;
+ }
+
+ @Override
+ public void run() {
+ getFullSizeURL();
+ }
+
+ /**
+ * Get URL to Artwork and return fullsize URL with file ending.
+ *
+ * @param page Like
+ * https://www.deviantart.com/apofiss/art/warmest-of-the-days-455668450
+ * @return URL like
+ * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/07f7a6bb-2d35-4630-93fc-be249af22b3e/d7jak0y-d20e5932-df72-4d13-b002-5e122037b373.jpg
+ *
+ *
+ */
+ private void getFullSizeURL() {
+
+ LOGGER.info("Searching max. Resolution for " + url);
+ sendUpdate(STATUS.LOADING_RESOURCE, "Searching max. resolution for " + url);
+ try {
+ Response re = Http.url(url).connection().referrer(referer).userAgent(userAgent).cookies(getDACookie())
+ .execute();
+ Document doc = re.parse();
+
+ // Artwork Title
+ String title = doc.select("a.title").first().html();
+ title = title.replaceAll("[^a-zA-Z0-9\\.\\-]", "_").toLowerCase();
+
+ int counter = 1;
+ if (names.contains(title)) {
+ while (names.contains(title + "_" + counter)) {
+ counter++;
+ }
+ title = title + "_" + counter;
+ }
+ names.add(title);
+
+ // Check for download button
+ Element downloadButton = null;
+
+ downloadButton = doc.select("a.dev-page-download").first();
+
+ // Download Button
+ if (downloadButton != null) {
+ LOGGER.info("Download Button found: " + downloadButton.attr("href"));
+
+ Response download = Http.url(downloadButton.attr("href")).connection().cookies(getDACookie())
+ .method(Method.GET).referrer(referer).userAgent(userAgent).ignoreContentType(true)
+ .followRedirects(true).execute();
+ URL location = download.url();
+
+ System.out.println("----------------> " + url);
+ String[] filetypePart = download.header("Content-Disposition").split("\\.");
+
+ LOGGER.info("Found Image URL");
+ LOGGER.info(url);
+ LOGGER.info(location);
+
+ addURLToDownload(location, "", "", "", getDACookie(),
+ title + "." + filetypePart[filetypePart.length - 1]);
+ return;
+ }
+
+ // No Download Button
+ Element div = doc.select("div.dev-view-deviation").first();
+
+ Element image = div.getElementsByTag("img").first();
+
+ String source = "";
+ if (image == null) {
+ LOGGER.error("ERROR on " + url);
+
+ LOGGER.error("Cookies: " + getDACookie() + " ");
+ LOGGER.error(div);
+ sendUpdate(STATUS.DOWNLOAD_ERRORED, "ERROR at\n" + url);
+ return;
+ }
+
+ // When it is text art (e.g. story) the only image is the avator (profile
+ // picture)
+ if (image.hasClass("avatar")) {
+ LOGGER.error("No Image found, probably text art");
+ LOGGER.error(url);
+ return;
+ }
+
+ source = image.attr("src");
+
+ String[] parts = source.split("/v1/");
+
+ // Image page uses scaled down version. Split at /v1/ to receive max size.
+ if (parts.length > 2) {
+ LOGGER.error("Unexpected URL Format");
+ sendUpdate(STATUS.DOWNLOAD_ERRORED, "Unexpected URL Format");
+ return;
+ }
+
+ String[] tmpParts = parts[0].split("\\.");
+
+ LOGGER.info("Found Image URL");
+ LOGGER.info(url);
+ LOGGER.info(parts[0]);
+ while (Http.url(parts[0]).connection().execute().statusCode() == 404) {
+ try {
+ LOGGER.error("404 on " + url);
+ Thread.sleep(1000);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ addURLToDownload(new URL(parts[0]), "", "", "", new HashMap(),
+ title + "." + tmpParts[tmpParts.length - 1]);
+ return;
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ LOGGER.error("No Full Size URL for: " + url);
+ sendUpdate(STATUS.DOWNLOAD_ERRORED, "No image found for " + url);
+
+ return;
+
+ }
+ }
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java
index 864a730a..534a1d0d 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java
@@ -6,8 +6,6 @@ import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.IOException;
import java.net.MalformedURLException;
-import java.net.URI;
-import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -18,136 +16,154 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
-public class E621Ripper extends AbstractHTMLRipper{
- private static final Logger logger = Logger.getLogger(E621Ripper.class);
+public class E621Ripper extends AbstractHTMLRipper {
+ private static final Logger logger = Logger.getLogger(E621Ripper.class);
- private static Pattern gidPattern=null;
- private static Pattern gidPattern2=null;
- private static Pattern gidPatternPool=null;
+ private static Pattern gidPattern = null;
+ private static Pattern gidPattern2 = null;
+ private static Pattern gidPatternPool = null;
- private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621");
+ private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
- public E621Ripper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public DownloadThreadPool getThreadPool() {
- return e621ThreadPool;
- }
-
- @Override
- public String getDomain() {
- return "e621.net";
- }
-
- @Override
- public String getHost() {
- return "e621";
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- if(url.getPath().startsWith("/pool/show/"))
- return Http.url("https://e621.net/pool/show/"+getTerm(url)).get();
- else
- return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get();
- }
-
- private String getFullSizedImage(String url) {
- try {
- Document page = Http.url("https://e621.net" + url).get();
- Elements video = page.select("video > source");
- Elements flash = page.select("embed");
- Elements image = page.select("a#highres");
- if (video.size() > 0) {
- return video.attr("src");
- } else if (flash.size() > 0) {
- return flash.attr("src");
- } else if (image.size() > 0) {
- return image.attr("href");
- } else {
- throw new IOException();
- }
- } catch (IOException e) {
- logger.error("Unable to get full sized image from " + url);
- return null;
- }
+ public E621Ripper(URL url) throws IOException {
+ super(url);
}
- @Override
- public List getURLsFromPage(Document page) {
- Elements elements = page.select("div > span.thumb > a");
- List res = new ArrayList<>();
+ @Override
+ public DownloadThreadPool getThreadPool() {
+ return e621ThreadPool;
+ }
- for(Element e:elements) {
- if (!e.attr("href").isEmpty()) {
- String fullSizedImage = getFullSizedImage(e.attr("href"));
- if (fullSizedImage != null && !fullSizedImage.equals("")) {
- res.add(getFullSizedImage(e.attr("href")));
- }
+ @Override
+ public String getDomain() {
+ return "e621.net";
+ }
+
+ @Override
+ public String getHost() {
+ return "e621";
+ }
+
+ @Override
+ public Document getFirstPage() throws IOException {
+ if (url.getPath().startsWith("/pool/show/"))
+ return Http.url("https://e621.net/pool/show/" + getTerm(url)).get();
+ else
+ return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get();
+ }
+
+ @Override
+ public List getURLsFromPage(Document page) {
+ Elements elements = page.select("div > span.thumb > a");
+ List res = new ArrayList<>();
+
+ for (Element e : elements) {
+ if (!e.attr("href").isEmpty()) {
+ res.add(e.attr("abs:href"));
}
- }
+ }
- return res;
- }
+ return res;
+ }
- @Override
- public Document getNextPage(Document page) throws IOException {
- if (page.select("a.next_page") != null) {
- return Http.url("https://e621.net" + page.select("a.next_page").attr("href")).get();
+ @Override
+ public Document getNextPage(Document page) throws IOException {
+ if (!page.select("a.next_page").isEmpty()) {
+ return Http.url(page.select("a.next_page").attr("abs:href")).get();
} else {
- throw new IOException("No more pages");
+ throw new IOException("No more pages.");
}
}
- @Override
- public void downloadURL(final URL url, int index) {
- addURLToDownload(url, getPrefix(index));
- }
+ @Override
+ public void downloadURL(final URL url, int index) {
+ // addURLToDownload(url, getPrefix(index));
+ e621ThreadPool.addThread(new E621FileThread(url, getPrefix(index)));
+ }
- private String getTerm(URL url) throws MalformedURLException{
- if(gidPattern==null)
- gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$");
- if(gidPatternPool==null)
- gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
+ private String getTerm(URL url) throws MalformedURLException {
+ if (gidPattern == null)
+ gidPattern = Pattern.compile(
+ "^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$");
+ if (gidPatternPool == null)
+ gidPatternPool = Pattern.compile(
+ "^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
- Matcher m = gidPattern.matcher(url.toExternalForm());
- if(m.matches()) {
+ Matcher m = gidPattern.matcher(url.toExternalForm());
+ if (m.matches()) {
LOGGER.info(m.group(2));
return m.group(2);
}
- m = gidPatternPool.matcher(url.toExternalForm());
- if(m.matches()) {
+ m = gidPatternPool.matcher(url.toExternalForm());
+ if (m.matches()) {
return m.group(2);
}
- throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
- }
+ throw new MalformedURLException(
+ "Expected e621.net URL format: e621.net/post/index/1/searchterm - got " + url + " instead");
+ }
- @Override
- public String getGID(URL url) throws MalformedURLException {
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ String prefix = "";
+ if (url.getPath().startsWith("/pool/show/")) {
+ prefix = "pool_";
+ }
+ return Utils.filesystemSafe(prefix + getTerm(url));
+ }
- String prefix="";
- if (url.getPath().startsWith("/pool/show/")) {
- prefix = "pool_";
+ @Override
+ public URL sanitizeURL(URL url) throws MalformedURLException {
+ if (gidPattern2 == null)
+ gidPattern2 = Pattern.compile(
+ "^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$");
+
+ Matcher m = gidPattern2.matcher(url.toExternalForm());
+ if (m.matches())
+ return new URL("https://e621.net/post/index/1/" + m.group(2).replace("+", "%20"));
+
+ return url;
+ }
+
+ public class E621FileThread extends Thread {
+
+ private URL url;
+ private String index;
+
+ public E621FileThread(URL url, String index) {
+ this.url = url;
+ this.index = index;
+ }
+
+ @Override
+ public void run() {
+ try {
+ String fullSizedImage = getFullSizedImage(url);
+ if (fullSizedImage != null && !fullSizedImage.equals("")) {
+ addURLToDownload(new URL(fullSizedImage), index);
+ }
+ } catch (IOException e) {
+ logger.error("Unable to get full sized image from " + url);
+ }
+ }
+
+ private String getFullSizedImage(URL imageURL) throws IOException {
+ Document page = Http.url(imageURL).retries(3).get();
+ Elements video = page.select("video > source");
+ Elements flash = page.select("embed");
+ Elements image = page.select("a#highres");
+ if (video.size() > 0) {
+ return video.attr("src");
+ } else if (flash.size() > 0) {
+ return flash.attr("src");
+ } else if (image.size() > 0) {
+ return image.attr("href");
+ } else {
+ throw new IOException();
}
- return Utils.filesystemSafe(prefix+getTerm(url));
-
- }
-
- @Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- if(gidPattern2==null)
- gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$");
-
- Matcher m = gidPattern2.matcher(url.toExternalForm());
- if(m.matches())
- return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20"));
-
- return url;
- }
+ }
+ }
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
index 7c4d15c1..22968216 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
@@ -115,8 +115,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
String image = null;
if (thumb.hasAttr("data-cfsrc")) {
image = thumb.attr("data-cfsrc");
- }
- else {
+ } else {
// Deobfustace the json data
String rawJson = deobfuscateJSON(page.select("script#ractive-public").html()
.replaceAll(">", ">").replaceAll("<", "<").replace("&", "&"));
@@ -125,17 +124,16 @@ public class EightmusesRipper extends AbstractHTMLRipper {
for (int i = 0; i != json.getJSONArray("pictures").length(); i++) {
image = "https://www.8muses.com/image/fl/" + json.getJSONArray("pictures").getJSONObject(i).getString("publicUri");
URL imageUrl = new URL(image);
- if (Utils.getConfigBoolean("8muses.use_short_names", false)) {
- addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, "", null, true);
- } else {
- addURLToDownload(imageUrl, getPrefixLong(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, "", null, true);
- }
+ addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, "", null, true);
// X is our page index
x++;
+ if (isThisATest()) {
+ break;
+ }
}
-
- } catch (IOException e) {
- continue;
+ return imageURLs;
+ } catch (MalformedURLException e) {
+ LOGGER.error("\"" + image + "\" is malformed");
}
}
if (!image.contains("8muses.com")) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java
new file mode 100644
index 00000000..dc535dea
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java
@@ -0,0 +1,119 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.ui.RipStatusMessage;
+import com.rarchives.ripme.utils.Http;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class ErofusRipper extends AbstractHTMLRipper {
+
+ public ErofusRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public boolean hasASAPRipping() {
+ return true;
+ }
+
+ @Override
+ public String getHost() {
+ return "erofus";
+ }
+
+ @Override
+ public String getDomain() {
+ return "erofus.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p = Pattern.compile("^https://www.erofus.com/comics/([a-zA-Z0-9\\-_]+).*$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (!m.matches()) {
+ throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
+ }
+ return m.group(m.groupCount());
+ }
+
+ @Override
+ public Document getFirstPage() throws IOException {
+ return Http.url(url).get();
+ }
+
+ @Override
+ public List getURLsFromPage(Document page) {
+ LOGGER.info(page);
+ List imageURLs = new ArrayList<>();
+ int x = 1;
+ if (pageContainsImages(page)) {
+ LOGGER.info("Page contains images");
+ ripAlbum(page);
+ } else {
+ // This contains the thumbnails of all images on the page
+ Elements pageImages = page.select("a.a-click");
+ for (Element pageLink : pageImages) {
+ if (super.isStopped()) break;
+ if (pageLink.attr("href").contains("comics")) {
+ String subUrl = "https://erofus.com" + pageLink.attr("href");
+ try {
+ LOGGER.info("Retrieving " + subUrl);
+ sendUpdate(RipStatusMessage.STATUS.LOADING_RESOURCE, subUrl);
+ Document subPage = Http.url(subUrl).get();
+ List subalbumImages = getURLsFromPage(subPage);
+ } catch (IOException e) {
+ LOGGER.warn("Error while loading subalbum " + subUrl, e);
+ }
+ }
+ if (isThisATest()) break;
+ }
+ }
+
+
+ return imageURLs;
+ }
+
+ public void ripAlbum(Document page) {
+ int x = 1;
+ Elements thumbs = page.select("a.a-click > div.thumbnail > img");
+ for (Element thumb : thumbs) {
+ String image = "https://www.erofus.com" + thumb.attr("src").replaceAll("thumb", "medium");
+ try {
+ Map opts = new HashMap();
+ opts.put("subdirectory", page.title().replaceAll(" \\| Erofus - Sex and Porn Comics", "").replaceAll(" ", "_"));
+ opts.put("prefix", getPrefix(x));
+ addURLToDownload(new URL(image), opts);
+ } catch (MalformedURLException e) {
+ LOGGER.info(e.getMessage());
+ }
+ x++;
+ }
+ }
+
+ private boolean pageContainsImages(Document page) {
+ Elements pageImages = page.select("a.a-click");
+ for (Element pageLink : pageImages) {
+ if (pageLink.attr("href").contains("/pic/")) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java
index 39098f98..9b586b9a 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java
@@ -51,7 +51,7 @@ public class EromeRipper extends AbstractHTMLRipper {
@Override
public boolean pageContainsAlbums(URL url) {
- Pattern pa = Pattern.compile("https?://www.erome.com/([a-zA-Z0-9_-]*)/?");
+ Pattern pa = Pattern.compile("https?://www.erome.com/([a-zA-Z0-9_\\-?=]*)/?");
Matcher ma = pa.matcher(url.toExternalForm());
return ma.matches();
}
@@ -111,7 +111,7 @@ public class EromeRipper extends AbstractHTMLRipper {
return m.group(1);
}
- p = Pattern.compile("^https?://www.erome.com/([a-zA-Z0-9_-]+)/?$");
+ p = Pattern.compile("^https?://www.erome.com/([a-zA-Z0-9_\\-?=]+)/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
index 26699c2a..683c791b 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
@@ -219,16 +219,21 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern
- .compile("^https?://www\\.furaffinity\\.net/gallery/([-_.0-9a-zA-Z]+).*$");
+ // Gallery
+ Pattern p = Pattern.compile("^https?://www\\.furaffinity\\.net/gallery/([-_.0-9a-zA-Z]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
- throw new MalformedURLException("Expected furaffinity.net URL format: "
- + "www.furaffinity.net/gallery/username - got " + url
- + " instead");
+ //Scraps
+ p = Pattern.compile("^https?://www\\.furaffinity\\.net/scraps/([-_.0-9a-zA-Z]+).*$");
+ m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+
+ throw new MalformedURLException("Unable to find images in" + url);
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java
index 45ce2b92..d88b16e8 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java
@@ -1,23 +1,30 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
-import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.jsoup.Connection.Method;
+import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
-import com.rarchives.ripme.utils.Utils;
public class FuskatorRipper extends AbstractHTMLRipper {
+ private String jsonurl = "https://fuskator.com/ajax/gal.aspx";
+ private String xAuthUrl = "https://fuskator.com/ajax/auth.aspx";
+ private String xAuthToken;
+ private Map cookies;
+
public FuskatorRipper(URL url) throws IOException {
super(url);
}
@@ -26,6 +33,7 @@ public class FuskatorRipper extends AbstractHTMLRipper {
public String getHost() {
return "fuskator";
}
+
@Override
public String getDomain() {
return "fuskator.com";
@@ -37,45 +45,55 @@ public class FuskatorRipper extends AbstractHTMLRipper {
if (u.contains("/thumbs/")) {
u = u.replace("/thumbs/", "/full/");
}
+ if (u.contains("/expanded/")) {
+ u = u.replaceAll("/expanded/", "/full/");
+ }
return new URL(u);
}
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^.*fuskator.com/full/([a-zA-Z0-9\\-]+).*$");
+ Pattern p = Pattern.compile("^.*fuskator.com/full/([a-zA-Z0-9\\-~]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
- "Expected fuskator.com gallery formats: "
- + "fuskator.com/full/id/..."
- + " Got: " + url);
+ "Expected fuskator.com gallery formats: " + "fuskator.com/full/id/..." + " Got: " + url);
}
@Override
public Document getFirstPage() throws IOException {
- return Http.url(url).get();
+ // return Http.url(url).get();
+ Response res = Http.url(url).response();
+ cookies = res.cookies();
+ return res.parse();
}
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
- String html = doc.html();
- // Get "baseUrl"
- String baseUrl = Utils.between(html, "unescape('", "'").get(0);
+ JSONObject json;
+
try {
- baseUrl = URLDecoder.decode(baseUrl, "UTF-8");
- } catch (UnsupportedEncodingException e) {
- LOGGER.warn("Error while decoding " + baseUrl, e);
+ getXAuthToken();
+ if (xAuthToken == null || xAuthToken.isEmpty()) {
+ throw new IOException("No xAuthToken found.");
+ }
+
+ // All good. Fetch JSON data from jsonUrl.
+ json = Http.url(jsonurl).cookies(cookies).data("X-Auth", xAuthToken).data("hash", getGID(url))
+ .data("_", Long.toString(System.currentTimeMillis())).getJSON();
+ } catch (IOException e) {
+ LOGGER.error("Couldnt fetch images.", e.getCause());
+ return imageURLs;
}
- if (baseUrl.startsWith("//")) {
- baseUrl = "http:" + baseUrl;
- }
- // Iterate over images
- for (String filename : Utils.between(html, "+'", "'")) {
- imageURLs.add(baseUrl + filename);
+
+ JSONArray imageArray = json.getJSONArray("images");
+ for (int i = 0; i < imageArray.length(); i++) {
+ imageURLs.add("https:" + imageArray.getJSONObject(i).getString("imageUrl"));
}
+
return imageURLs;
}
@@ -83,4 +101,12 @@ public class FuskatorRipper extends AbstractHTMLRipper {
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
+
+ private void getXAuthToken() throws IOException {
+ if (cookies == null || cookies.isEmpty()) {
+ throw new IOException("Null cookies or no cookies found.");
+ }
+ Response res = Http.url(xAuthUrl).cookies(cookies).method(Method.POST).response();
+ xAuthToken = res.body();
+ }
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
index 9c2db859..52a19b74 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
@@ -9,16 +9,24 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import org.json.JSONArray;
+import org.json.JSONObject;
import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.utils.Http;
-public class GfycatRipper extends AbstractSingleFileRipper {
+public class GfycatRipper extends AbstractHTMLRipper {
private static final String HOST = "gfycat.com";
+ String username = "";
+ String cursor = "";
+ String count = "30";
+
+
public GfycatRipper(URL url) throws IOException {
super(url);
@@ -45,10 +53,20 @@ public class GfycatRipper extends AbstractSingleFileRipper {
return url;
}
+ public boolean isProfile() {
+ Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9]+).*$");
+ Matcher m = p.matcher(url.toExternalForm());
+ return m.matches();
+ }
@Override
public Document getFirstPage() throws IOException {
- return Http.url(url).get();
+ if (!isProfile()) {
+ return Http.url(url).get();
+ } else {
+ username = getGID(url);
+ return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats")).ignoreContentType().get();
+ }
}
@Override
@@ -58,7 +76,7 @@ public class GfycatRipper extends AbstractSingleFileRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/([a-zA-Z0-9]+).*$");
+ Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@?([a-zA-Z0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
@@ -70,15 +88,45 @@ public class GfycatRipper extends AbstractSingleFileRipper {
+ " Got: " + url);
}
+ private String stripHTMLTags(String t) {
+ t = t.replaceAll("\n" +
+ " \n" +
+ " ", "");
+ t.replaceAll("\n" +
+ "", "");
+ t = t.replaceAll("\n", "");
+ t = t.replaceAll("=\"\"", "");
+ return t;
+ }
+
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ if (cursor.equals("")) {
+ throw new IOException("No more pages");
+ }
+ return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
+ }
+
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
- Elements videos = doc.select("source");
- String vidUrl = videos.first().attr("src");
- if (vidUrl.startsWith("//")) {
- vidUrl = "http:" + vidUrl;
+ if (isProfile()) {
+ JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
+ JSONArray content = page.getJSONArray("gfycats");
+ for (int i = 0; i < content.length(); i++) {
+ result.add(content.getJSONObject(i).getString("mp4Url"));
+ }
+ cursor = page.getString("cursor");
+ } else {
+ Elements videos = doc.select("script");
+ for (Element el : videos) {
+ String json = el.html();
+ if (json.startsWith("{")) {
+ JSONObject page = new JSONObject(json);
+ result.add(page.getJSONObject("video").getString("contentUrl"));
+ }
+ }
}
- result.add(vidUrl);
return result;
}
@@ -95,14 +143,14 @@ public class GfycatRipper extends AbstractSingleFileRipper {
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
Document doc = Http.url(url).get();
- Elements videos = doc.select("source");
- if (videos.isEmpty()) {
- throw new IOException("Could not find source at " + url);
+ Elements videos = doc.select("script");
+ for (Element el : videos) {
+ String json = el.html();
+ if (json.startsWith("{")) {
+ JSONObject page = new JSONObject(json);
+ return page.getJSONObject("video").getString("contentUrl");
+ }
}
- String vidUrl = videos.first().attr("src");
- if (vidUrl.startsWith("//")) {
- vidUrl = "http:" + vidUrl;
- }
- return vidUrl;
+ throw new IOException();
}
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java
index e9c1a810..d4482b4c 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java
@@ -10,6 +10,7 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.rarchives.ripme.utils.Utils;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
@@ -53,8 +54,8 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
Response resp;
Document doc;
- resp = Http.url("http://www.hentai-foundry.com/?enterAgree=1&size=1500")
- .referrer("http://www.hentai-foundry.com/")
+ resp = Http.url("https://www.hentai-foundry.com/?enterAgree=1&size=1500")
+ .referrer("https://www.hentai-foundry.com/")
.cookies(cookies)
.response();
// The only cookie that seems to matter in getting around the age wall is the phpsession cookie
@@ -86,11 +87,11 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
data.put("rating_incest" , "1");
data.put("rating_rape" , "1");
data.put("filter_media" , "A");
- data.put("filter_order" , "date_new");
+ data.put("filter_order" , Utils.getConfigString("hentai-foundry.filter_order","date_old"));
data.put("filter_type" , "0");
- resp = Http.url("http://www.hentai-foundry.com/site/filters")
- .referrer("http://www.hentai-foundry.com/")
+ resp = Http.url("https://www.hentai-foundry.com/site/filters")
+ .referrer("https://www.hentai-foundry.com/")
.cookies(cookies)
.data(data)
.method(Method.POST)
@@ -102,7 +103,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
}
resp = Http.url(url)
- .referrer("http://www.hentai-foundry.com/")
+ .referrer("https://www.hentai-foundry.com/")
.cookies(cookies)
.response();
cookies.putAll(resp.cookies());
@@ -119,7 +120,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
Element first = els.first();
try {
String nextURL = first.attr("href");
- nextURL = "http://www.hentai-foundry.com" + nextURL;
+ nextURL = "https://www.hentai-foundry.com" + nextURL;
return Http.url(nextURL)
.referrer(url)
.cookies(cookies)
@@ -135,8 +136,8 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
// this if is for ripping pdf stories
if (url.toExternalForm().contains("/stories/")) {
for (Element pdflink : doc.select("a.pdfLink")) {
- LOGGER.info("grabbing " + "http://www.hentai-foundry.com" + pdflink.attr("href"));
- imageURLs.add("http://www.hentai-foundry.com" + pdflink.attr("href"));
+ LOGGER.info("grabbing " + "https://www.hentai-foundry.com" + pdflink.attr("href"));
+ imageURLs.add("https://www.hentai-foundry.com" + pdflink.attr("href"));
}
return imageURLs;
}
@@ -153,8 +154,8 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
Document imagePage;
try {
- LOGGER.info("grabbing " + "http://www.hentai-foundry.com" + thumb.attr("href"));
- imagePage = Http.url("http://www.hentai-foundry.com" + thumb.attr("href")).cookies(cookies).get();
+ LOGGER.info("grabbing " + "https://www.hentai-foundry.com" + thumb.attr("href"));
+ imagePage = Http.url("https://www.hentai-foundry.com" + thumb.attr("href")).cookies(cookies).get();
}
catch (IOException e) {
@@ -164,10 +165,10 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
}
// This is here for when the image is resized to a thumbnail because ripme doesn't report a screensize
if (imagePage.select("div.boxbody > img.center").attr("src").contains("thumbs.")) {
- imageURLs.add("http:" + imagePage.select("div.boxbody > img.center").attr("onclick").replace("this.src=", "").replace("'", "").replace("; $(#resize_message).hide();", ""));
+ imageURLs.add("https:" + imagePage.select("div.boxbody > img.center").attr("onclick").replace("this.src=", "").replace("'", "").replace("; $(#resize_message).hide();", ""));
}
else {
- imageURLs.add("http:" + imagePage.select("div.boxbody > img.center").attr("src"));
+ imageURLs.add("https:" + imagePage.select("div.boxbody > img.center").attr("src"));
}
}
return imageURLs;
@@ -179,7 +180,12 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
if (url.toExternalForm().endsWith(".pdf")) {
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
} else {
- addURLToDownload(url, getPrefix(index));
+// If hentai-foundry.use_prefix is false the ripper will not add a numbered prefix to any images
+ if (Utils.getConfigBoolean("hentai-foundry.use_prefix", true)) {
+ addURLToDownload(url, getPrefix(index));
+ } else {
+ addURLToDownload(url, "");
+ }
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java
new file mode 100644
index 00000000..a4e5895d
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java
@@ -0,0 +1,78 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+
+public class HentaifoxRipper extends AbstractHTMLRipper {
+
+ public HentaifoxRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "hentaifox";
+ }
+
+ @Override
+ public String getDomain() {
+ return "hentaifox.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p = Pattern.compile("https://hentaifox.com/gallery/([\\d]+)/?");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected hentaifox URL format: " +
+ "https://hentaifox.com/gallery/ID - got " + url + " instead");
+ }
+
+ @Override
+ public Document getFirstPage() throws IOException {
+ // "url" is an instance field of the superclass
+ return Http.url(url).get();
+ }
+
+ @Override
+ public List getURLsFromPage(Document doc) {
+ LOGGER.info(doc);
+ List result = new ArrayList<>();
+ for (Element el : doc.select("div.preview_thumb > a > img")) {
+ String imageSource = "https:" + el.attr("data-src").replaceAll("t\\.jpg", ".jpg");
+ result.add(imageSource);
+ }
+ return result;
+ }
+
+ @Override
+ public String getAlbumTitle(URL url) throws MalformedURLException {
+ try {
+ Document doc = getFirstPage();
+ String title = doc.select("div.info > h1").first().text();
+ return getHost() + "_" + title + "_" + getGID(url);
+ } catch (Exception e) {
+ // Fall back to default album naming convention
+ LOGGER.warn("Failed to get album title from " + url, e);
+ }
+ return super.getAlbumTitle(url);
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java
index 07a6e529..f097e667 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java
@@ -145,15 +145,13 @@ public class ImagefapRipper extends AbstractHTMLRipper {
try {
// Attempt to use album title as GID
String title = getFirstPage().title();
- Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$");
- Matcher m = p.matcher(title);
- if (m.matches()) {
- return getHost() + "_" + m.group(1) + "_" + getGID(url);
- }
+ title = title.replace("Porn Pics & Porn GIFs", "");
+ title = title.replace(" ", "_");
+ String toReturn = getHost() + "_" + title + "_" + getGID(url);
+ return toReturn.replaceAll("__", "_");
} catch (IOException e) {
- // Fall back to default album naming convention
+ return super.getAlbumTitle(url);
}
- return super.getAlbumTitle(url);
}
private String getFullSizedImage(String pageURL) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
index 37e27214..d0f8dd9a 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@@ -39,9 +39,6 @@ public class InstagramRipper extends AbstractJSONRipper {
private String userID;
private String rhx_gis = null;
private String csrftoken;
- // Run into a weird issue with Jsoup cutting some json pages in half, this is a work around
- // see https://github.com/RipMeApp/ripme/issues/601
- private String workAroundJsonString;
@@ -192,6 +189,9 @@ public class InstagramRipper extends AbstractJSONRipper {
Document p = resp.parse();
// Get the query hash so we can download the next page
qHash = getQHash(p);
+ if (qHash == null) {
+ throw new IOException("Unable to extract qhash from page");
+ }
return getJSONFromPage(p);
}
@@ -398,7 +398,6 @@ public class InstagramRipper extends AbstractJSONRipper {
}
private boolean pageHasImages(JSONObject json) {
- LOGGER.info(json);
int numberOfImages = json.getJSONObject("data").getJSONObject("user")
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
if (numberOfImages == 0) {
@@ -422,23 +421,36 @@ public class InstagramRipper extends AbstractJSONRipper {
}
in.close();
- workAroundJsonString = sb.toString();
return new JSONObject(sb.toString());
} catch (MalformedURLException e) {
- LOGGER.info("Unable to get query_hash, " + url + " is a malformed URL");
+ LOGGER.info("Unable to get page, " + url + " is a malformed URL");
return null;
} catch (IOException e) {
- LOGGER.info("Unable to get query_hash");
+ LOGGER.info("Unable to get page");
LOGGER.info(e.getMessage());
return null;
}
}
+ private String getQhashUrl(Document doc) {
+ for(Element el : doc.select("link[rel=preload]")) {
+ if (el.attr("href").contains("ProfilePageContainer")) {
+ return el.attr("href");
+ }
+ }
+ for(Element el : doc.select("link[rel=preload]")) {
+ if (el.attr("href").contains("metro")) {
+ return el.attr("href");
+ }
+ }
+ return null;
+ }
+
private String getQHash(Document doc) {
- String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href");
+ String jsFileURL = "https://www.instagram.com" + getQhashUrl(doc);
StringBuilder sb = new StringBuilder();
- Document jsPage;
+ LOGGER.info(jsFileURL);
try {
// We can't use Jsoup here because it won't download a non-html file larger than a MB
// even if you set maxBodySize to 0
@@ -454,7 +466,7 @@ public class InstagramRipper extends AbstractJSONRipper {
LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
return null;
} catch (IOException e) {
- LOGGER.info("Unable to get query_hash");
+ LOGGER.info("Unable to get query_hash from " + jsFileURL);
LOGGER.info(e.getMessage());
return null;
}
@@ -468,6 +480,12 @@ public class InstagramRipper extends AbstractJSONRipper {
m = jsP.matcher(sb.toString());
if (m.find()) {
return m.group(1);
+ } else {
+ jsP = Pattern.compile(",u=.([a-zA-Z0-9]+).");
+ m = jsP.matcher(sb.toString());
+ if (m.find()) {
+ return m.group(1);
+ }
}
}
@@ -477,6 +495,7 @@ public class InstagramRipper extends AbstractJSONRipper {
if (m.find()) {
return m.group(1);
}
+
}
LOGGER.error("Could not find query_hash on " + jsFileURL);
return null;
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MulemaxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MulemaxRipper.java
index 6f9d178d..01bf4b1c 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MulemaxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MulemaxRipper.java
@@ -74,6 +74,6 @@ public class MulemaxRipper extends AbstractSingleFileRipper {
@Override
public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index));
+ addURLToDownload(url, getPrefix(index), "", "mulemax.com", null);
}
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixDotOneRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixDotOneRipper.java
index 558060eb..c1e7fac7 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixDotOneRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixDotOneRipper.java
@@ -51,7 +51,7 @@ public class PorncomixDotOneRipper extends AbstractHTMLRipper {
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
// We have 2 loops here to cover all the different album types
- for (Element el : doc.select(".dgwt-jg-gallery > a")) {
+ for (Element el : doc.select(".dgwt-jg-item > a")) {
result.add(el.attr("href"));
}
for (Element el : doc.select(".unite-gallery > img")) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
index f0984d7d..e68e477d 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
@@ -118,6 +118,12 @@ public class RedditRipper extends AlbumRipper {
return nextURL;
}
+ /**
+ * Gets a representation of the specified reddit page as a JSONArray using the reddit API
+ * @param url The url of the desired page
+ * @return A JSONArray object representation of the desired page
+ * @throws IOException If no response is received from the url
+ */
private JSONArray getJsonArrayFromURL(URL url) throws IOException {
// Wait 2 seconds before the next request
long timeDiff = System.currentTimeMillis() - lastRequestTime;
@@ -149,9 +155,30 @@ public class RedditRipper extends AlbumRipper {
return jsonArray;
}
+ /**
+ * Turns child JSONObject's into usable URLs and hands them off for further processing
+ * Performs filtering checks based on the reddit.
+ * Only called from getAndParseAndReturnNext() while parsing the JSONArray returned from reddit's API
+ * @param child The child to process
+ */
private void parseJsonChild(JSONObject child) {
String kind = child.getString("kind");
JSONObject data = child.getJSONObject("data");
+
+ //Upvote filtering
+ if (Utils.getConfigBoolean("reddit.rip_by_upvote", false)){
+ int score = data.getInt("score");
+ int maxScore = Utils.getConfigInteger("reddit.max_upvotes", Integer.MAX_VALUE);
+ int minScore = Utils.getConfigInteger("reddit.min_upvotes", Integer.MIN_VALUE);
+
+ if (score > maxScore || score < minScore) {
+
+ String message = "Skipping post with score outside specified range of " + minScore + " to " + maxScore;
+ sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, message);
+ return; //Outside specified range, do not download
+ }
+ }
+
if (kind.equals("t1")) {
// Comment
handleBody(data.getString("body"), data.getString("id"), "");
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java
index 7d1a38bc..3c9d751d 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java
@@ -7,13 +7,31 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.jsoup.Jsoup;
+import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
public class ThechiveRipper extends AbstractHTMLRipper {
+ private Pattern p1 = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
+ private Pattern imagePattern = Pattern.compile("
");
+
+ // i.thechive.com specific variables.
+ private Pattern p2 = Pattern.compile("^https?://i.thechive.com/([0-9a-zA-Z_]+)");
+ private String jsonUrl = "https://i.thechive.com/rest/uploads";
+ private Map cookies = new HashMap<>();
+ private String nextSeed = "";
+ private String username = "";
public ThechiveRipper(URL url) throws IOException {
super(url);
@@ -21,7 +39,12 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public String getHost() {
- return "thechive";
+ Matcher m1 = p1.matcher(url.toExternalForm());
+ if (m1.matches()) {
+ return "thechive";
+ } else {
+ return "i.thechive"; // for suitable album title.
+ }
}
@Override
@@ -31,14 +54,20 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- boolean isTag = false;
- return m.group(1);
+
+ Matcher m1 = p1.matcher(url.toExternalForm());
+ if (m1.matches()) {
+ return m1.group(1);
}
+
+ Matcher m2 = p2.matcher(url.toExternalForm());
+ if (m2.matches()) {
+ username = m2.group(1);
+ return username;
+ }
+
throw new MalformedURLException("Expected thechive.com URL format: "
- + "thechive.com/YEAR/MONTH/DAY/POSTTITLE/ - got " + url + " instead");
+ + "thechive.com/YEAR/MONTH/DAY/POSTTITLE/ OR i.thechive.com/username, got " + url + " instead.");
}
@Override
@@ -49,27 +78,148 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public List getURLsFromPage(Document doc) {
- List result = new ArrayList<>();
- for (Element el : doc.select("img.attachment-gallery-item-full")) {
- String imageSource;
- if (el.attr("data-gifsrc").isEmpty()) { //If it's not a gif
- imageSource = el.attr("src");
- } else { //If it is a gif
- imageSource = el.attr("data-gifsrc") //from data-gifsrc attribute
- .replaceAll("\\?w=\\d{3}", ""); //remove the width modifier at the end to get highest resolution
- //May need to replace the regex's {3} later on if website starts giving higher-res photos by default.
- }
+ List result;
+ Matcher matcher = p1.matcher(url.toExternalForm());
- // We replace thumbs with resizes so we can the full sized images
- imageSource = imageSource.replace("thumbs", "resizes");
- result.add(imageSource);
+ if (matcher.matches()) {
+ // for url type: thechive.com/YEAR/MONTH/DAY/POSTTITLE/
+ result = getUrlsFromThechive(doc);
+ } else {
+ // for url type: i.thechive.com/username
+ result = getUrlsFromIDotThechive();
}
return result;
}
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ Matcher matcher = p1.matcher(url.toExternalForm());
+
+ if (matcher.matches()) {
+ // url type thechive.com/YEAR/MONTH/DAY/POSTTITLE/ has a single page.
+ return null;
+ } else {
+ if (nextSeed == null) {
+ throw new IOException("No more pages.");
+ }
+ }
+
+ // Following try block checks if the next JSON object has images or not.
+ // This is done to avoid IOException in rip() method, caused when
+ // getURLsFromPage() returns empty list.
+ JSONArray imgList;
+ try {
+ Response response = Http.url(jsonUrl).data("seed", nextSeed).data("queryType", "by-username")
+ .data("username", username).ignoreContentType().cookies(cookies).response();
+ cookies = response.cookies();
+ JSONObject json = new JSONObject(response.body());
+ imgList = json.getJSONArray("uploads");
+ } catch (Exception e) {
+ throw new IOException("Error fetching next page.", e);
+ }
+
+ if (imgList != null && imgList.length() > 0) {
+ // Pass empty document as it is of no use for thechive.com/userName url type.
+ return new Document(url.toString());
+ } else {
+ // Return null as this is last page.
+ return null;
+ }
+ }
+
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
+ private List getUrlsFromThechive(Document doc) {
+ /*
+ * The image urls are stored in a