mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-13 17:34:13 +02:00
3
patch.py
3
patch.py
@@ -12,6 +12,9 @@ from hashlib import sha256
|
||||
# - commit all changes
|
||||
message = input('message: ')
|
||||
|
||||
# Strip any spaces that might've been entered before the message
|
||||
message.lstrip()
|
||||
|
||||
|
||||
def get_ripme_json():
|
||||
with open('ripme.json') as dataFile:
|
||||
|
2
pom.xml
2
pom.xml
@@ -4,7 +4,7 @@
|
||||
<groupId>com.rarchives.ripme</groupId>
|
||||
<artifactId>ripme</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>1.7.77</version>
|
||||
<version>1.7.83</version>
|
||||
<name>ripme</name>
|
||||
<url>http://rip.rarchives.com</url>
|
||||
<properties>
|
||||
|
12
ripme.json
12
ripme.json
@@ -1,6 +1,13 @@
|
||||
{
|
||||
"currentHash": "34f326ec23f3c1ce8df1147c1d9660a1dd7b85074e79351c9295bd74ac8f127a",
|
||||
"currentHash": "4994abc3d8102823c3f35159a0759707fa4c1ccea0746081954f6acfdbe63d8f",
|
||||
"latestVersion": "1.7.83",
|
||||
"changeList": [
|
||||
"1.7.83: Added a ripper for hentaifox.com; Added ripper for Erofus.com; Fixed fsktr not ripping some images; Added support for Gfycat profiles; Added opt to disable prefix for HentaifoundryRipper ",
|
||||
"1.7.82: Hentai foundry now rips oldest first by default; 8muses ripper no longer makes unneeded requests; Added support for i.thechive.com",
|
||||
"1.7.81: Added support for artstn.co; Added new boolean config twitter.rip_retweet; Fixed MulemaxRipper; Fix minor bug that could cause a freeze at pending 1",
|
||||
"1.7.80: Fixed porncomix.one ripper; Fixed instagram ripper; Fixed Fuskator ripper; Fixed handling of urls with spaces in them",
|
||||
"1.7.79: Fixed artstation ripper; Fixed imagefap ripper folder naming; Can now filter reddit posts by votes; Added Ripper for Xlecx; Linux/Mac updater is now pure java",
|
||||
"1.7.78: Fixed gfycat ripper; Fixed E621 ripper; Added support for new xhamster url format; Now supports furaffinty scraps",
|
||||
"1.7.77: Reduced log spam; HQporner now supports actress/category/studio/top links; Improved luscious ripper; Fixed Pornhub video ripper; Tumblr ripper now always downloads highest quality available",
|
||||
"1.7.76: Fixed remember url history",
|
||||
"1.7.75: Fix e-hentai ripper; added comixfap ripper; fixed writting urls to files on windows; Fixed update screen issues; Added support for hentaidude; Fixed erome ripper",
|
||||
@@ -248,6 +255,5 @@
|
||||
"1.0.4: Fixed spaces-in-directory bug",
|
||||
"1.0.3: Added VK.com ripper",
|
||||
"1.0.1: Added auto-update functionality"
|
||||
],
|
||||
"latestVersion": "1.7.77"
|
||||
]
|
||||
}
|
@@ -93,6 +93,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
||||
|
||||
// We set doc to null here so the while loop below this doesn't fire
|
||||
doc = null;
|
||||
LOGGER.debug("Adding items from " + this.url + " to queue");
|
||||
}
|
||||
|
||||
while (doc != null) {
|
||||
|
@@ -218,6 +218,44 @@ public abstract class AbstractRipper
|
||||
protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String, String> cookies,
|
||||
Boolean getFileExtFromMIME);
|
||||
|
||||
/**
|
||||
* Queues image to be downloaded and saved.
|
||||
* @param url
|
||||
* URL of the file
|
||||
* @param options
|
||||
* A map<String,String> containing any changes to the default options.
|
||||
* Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
|
||||
* getFileExtFromMIME should be "true" or "false"
|
||||
* @param cookies
|
||||
* The cookies to send to the server while downloading this file.
|
||||
* @return
|
||||
* True if downloaded successfully
|
||||
* False if failed to download
|
||||
*/
|
||||
protected boolean addURLToDownload(URL url, Map<String, String> options, Map<String, String> cookies) {
|
||||
// Bit of a hack but this lets us pass a bool using a map<string,String>
|
||||
boolean useMIME = options.getOrDefault("getFileExtFromMIME", "false").toLowerCase().equals("true");
|
||||
return addURLToDownload(url, options.getOrDefault("prefix", ""), options.getOrDefault("subdirectory", ""), options.getOrDefault("referrer", null),
|
||||
cookies, options.getOrDefault("fileName", null), options.getOrDefault("extension", null), useMIME);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Queues image to be downloaded and saved.
|
||||
* @param url
|
||||
* URL of the file
|
||||
* @param options
|
||||
* A map<String,String> containing any changes to the default options.
|
||||
* Options are getFileExtFromMIME, prefix, subdirectory, referrer, fileName, extension, getFileExtFromMIME.
|
||||
* getFileExtFromMIME should be "true" or "false"
|
||||
* @return
|
||||
* True if downloaded successfully
|
||||
* False if failed to download
|
||||
*/
|
||||
protected boolean addURLToDownload(URL url, Map<String, String> options) {
|
||||
return addURLToDownload(url, options, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queues image to be downloaded and saved.
|
||||
* @param url
|
||||
@@ -237,6 +275,22 @@ public abstract class AbstractRipper
|
||||
* False if failed to download
|
||||
*/
|
||||
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies, String fileName, String extension, Boolean getFileExtFromMIME) {
|
||||
// A common bug is rippers adding urls that are just "http:". This rejects said urls
|
||||
if (url.toExternalForm().equals("http:") || url.toExternalForm().equals("https:")) {
|
||||
LOGGER.info(url.toExternalForm() + " is a invalid url amd will be changed");
|
||||
return false;
|
||||
|
||||
}
|
||||
// Make sure the url doesn't contain any spaces as that can cause a 400 error when requesting the file
|
||||
if (url.toExternalForm().contains(" ")) {
|
||||
// If for some reason the url with all spaces encoded as %20 is malformed print an error
|
||||
try {
|
||||
url = new URL(url.toExternalForm().replaceAll(" ", "%20"));
|
||||
} catch (MalformedURLException e) {
|
||||
LOGGER.error("Unable to remove spaces from url\nURL: " + url.toExternalForm());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
// Don't re-add the url if it was downloaded in a previous rip
|
||||
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
||||
if (hasDownloadedURL(url.toExternalForm())) {
|
||||
|
@@ -1,7 +1,6 @@
|
||||
package com.rarchives.ripme.ripper;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.reflect.Array;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.net.URL;
|
||||
@@ -14,13 +13,11 @@ import java.util.ResourceBundle;
|
||||
import javax.net.ssl.HttpsURLConnection;
|
||||
|
||||
import com.rarchives.ripme.ui.MainWindow;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.jsoup.HttpStatusException;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import static java.lang.Math.toIntExact;
|
||||
|
||||
/**
|
||||
* Thread for downloading files.
|
||||
@@ -139,6 +136,7 @@ class DownloadFileThread extends Thread {
|
||||
|
||||
int statusCode = huc.getResponseCode();
|
||||
logger.debug("Status code: " + statusCode);
|
||||
// If the server doesn't allow resuming downloads error out
|
||||
if (statusCode != 206 && observer.tryResumeDownload() && saveAs.exists()) {
|
||||
// TODO find a better way to handle servers that don't support resuming downloads then just erroring out
|
||||
throw new IOException(rb.getString("server.doesnt.support.resuming.downloads"));
|
||||
|
@@ -7,12 +7,13 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.Connection.Method;
|
||||
import org.jsoup.Connection.Response;
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
public class ArtStationRipper extends AbstractJSONRipper {
|
||||
enum URL_TYPE {
|
||||
SINGLE_PROJECT, USER_PORTFOLIO, UNKNOWN
|
||||
@@ -47,7 +48,8 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) {
|
||||
// URL points to single project, use project title as GID
|
||||
try {
|
||||
groupData = Http.url(albumURL.getLocation()).getJSON();
|
||||
// groupData = Http.url(albumURL.getLocation()).getJSON();
|
||||
groupData = getJson(albumURL.getLocation());
|
||||
} catch (IOException e) {
|
||||
throw new MalformedURLException("Couldn't load JSON from " + albumURL.getLocation());
|
||||
}
|
||||
@@ -58,7 +60,8 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
// URL points to user portfolio, use user's full name as GID
|
||||
String userInfoURL = "https://www.artstation.com/users/" + albumURL.getID() + "/quick.json";
|
||||
try {
|
||||
groupData = Http.url(userInfoURL).getJSON();
|
||||
// groupData = Http.url(userInfoURL).getJSON();
|
||||
groupData = getJson(userInfoURL);
|
||||
} catch (IOException e) {
|
||||
throw new MalformedURLException("Couldn't load JSON from " + userInfoURL);
|
||||
}
|
||||
@@ -67,26 +70,29 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
|
||||
// No JSON found in the URL entered, can't rip
|
||||
throw new MalformedURLException(
|
||||
"Expected URL to an ArtStation project or user profile - got " + url + " instead");
|
||||
"Expected URL to an ArtStation 'project url' or 'user profile url' - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JSONObject getFirstPage() throws IOException {
|
||||
if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) {
|
||||
// URL points to JSON of a single project, just return it
|
||||
return Http.url(albumURL.getLocation()).getJSON();
|
||||
// return Http.url(albumURL.getLocation()).getJSON();
|
||||
return getJson(albumURL.getLocation());
|
||||
}
|
||||
|
||||
if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) {
|
||||
// URL points to JSON of a list of projects, load it to parse individual
|
||||
// projects
|
||||
JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON();
|
||||
// JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON();
|
||||
JSONObject albumContent = getJson(albumURL.getLocation());
|
||||
|
||||
if (albumContent.getInt("total_count") > 0) {
|
||||
// Get JSON of the first project and return it
|
||||
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(0);
|
||||
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
|
||||
return Http.url(projectURL.getLocation()).getJSON();
|
||||
// return Http.url(projectURL.getLocation()).getJSON();
|
||||
return getJson(projectURL.getLocation());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,14 +118,16 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
Integer currentProject = ((projectPageNumber - 1) * 50) + (projectIndex + 1);
|
||||
JSONObject albumContent = Http.url(albumURL.getLocation() + "?page=" + projectPageNumber).getJSON();
|
||||
// JSONObject albumContent = Http.url(albumURL.getLocation() + "?page=" + projectPageNumber).getJSON();
|
||||
JSONObject albumContent = getJson(albumURL.getLocation() + "?page=" + projectPageNumber);
|
||||
|
||||
if (albumContent.getInt("total_count") > currentProject) {
|
||||
// Get JSON of the next project and return it
|
||||
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(projectIndex);
|
||||
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
|
||||
projectIndex++;
|
||||
return Http.url(projectURL.getLocation()).getJSON();
|
||||
// return Http.url(projectURL.getLocation()).getJSON();
|
||||
return getJson(projectURL.getLocation());
|
||||
}
|
||||
|
||||
throw new IOException("No more projects");
|
||||
@@ -181,9 +189,12 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
/**
|
||||
* Construct a new ParsedURL object.
|
||||
*
|
||||
* @param urlType URL_TYPE enum containing the URL type
|
||||
* @param jsonURL String containing the JSON URL location
|
||||
* @param urlID String containing the ID of this URL
|
||||
* @param urlType
|
||||
* URL_TYPE enum containing the URL type
|
||||
* @param jsonURL
|
||||
* String containing the JSON URL location
|
||||
* @param urlID
|
||||
* String containing the ID of this URL
|
||||
*
|
||||
*/
|
||||
ParsedURL(URL_TYPE urlType, String jsonURL, String urlID) {
|
||||
@@ -226,7 +237,8 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
/**
|
||||
* Parses an ArtStation URL.
|
||||
*
|
||||
* @param url URL to an ArtStation user profile
|
||||
* @param url
|
||||
* URL to an ArtStation user profile
|
||||
* (https://www.artstation.com/username) or single project
|
||||
* (https://www.artstation.com/artwork/projectid)
|
||||
* @return ParsedURL object containing URL type, JSON location and ID (stores
|
||||
@@ -239,7 +251,30 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
|
||||
// Load HTML Source of the specified URL
|
||||
try {
|
||||
htmlSource = Http.url(url).get().html();
|
||||
// htmlSource = Http.url(url).get().html();
|
||||
Connection con = Http.url(url).method(Method.GET).connection();
|
||||
con.ignoreHttpErrors(true);
|
||||
con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
|
||||
con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
|
||||
con.header("Accept-Language", "en-US,en;q=0.5");
|
||||
con.header("Accept-Encoding", "gzip, deflate, br");
|
||||
con.header("Upgrade-Insecure-Requests", "1");
|
||||
Response res = con.execute();
|
||||
int status = res.statusCode();
|
||||
|
||||
if (status / 100 == 2) {
|
||||
htmlSource = res.parse().html();
|
||||
} else if (status == 403 && url.toString().contains("artwork/")) {
|
||||
// Catches cloudflare page. Error 403.
|
||||
// Usually caused by artwork URLs( arstation.com/artwork/someProjectId)
|
||||
String urlId = url.toString().substring(url.toString().lastIndexOf("/") + 1);
|
||||
String jsonURL = "https://www.artstation.com/projects/" + urlId + ".json";
|
||||
parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT, jsonURL, urlId);
|
||||
return parsedURL;
|
||||
} else {
|
||||
LOGGER.error("Couldnt fetch URL: " + url);
|
||||
throw new IOException("Error fetching URL: " + url + " Status Code: " + status);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
htmlSource = "";
|
||||
}
|
||||
@@ -266,5 +301,28 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null);
|
||||
return parsedURL;
|
||||
}
|
||||
|
||||
// Use this method instead of direct call to Http.url(url).getJson() to avoid cloudflare 403 page.
|
||||
private JSONObject getJson(URL url) throws IOException {
|
||||
Connection con = Http.url(url).method(Method.GET).connection();
|
||||
con.ignoreHttpErrors(true);
|
||||
con.ignoreContentType(true);
|
||||
con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
|
||||
con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
|
||||
con.header("Accept-Language", "en-US,en;q=0.5");
|
||||
con.header("Accept-Encoding", "gzip, deflate, br");
|
||||
con.header("Upgrade-Insecure-Requests", "1");
|
||||
Response res = con.execute();
|
||||
int status = res.statusCode();
|
||||
if (status / 100 == 2) {
|
||||
String jsonString = res.body();
|
||||
return new JSONObject(jsonString);
|
||||
}
|
||||
throw new IOException("Error fetching json. Status code:" + status);
|
||||
}
|
||||
|
||||
private JSONObject getJson(String url) throws IOException{
|
||||
return getJson(new URL(url));
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -0,0 +1,58 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
import org.jsoup.Connection.Response;
|
||||
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
/*
|
||||
* Ripper for ArtStation's short URL domain.
|
||||
* Example URL: https://artstn.co/p/JlE15Z
|
||||
*/
|
||||
|
||||
public class ArtstnRipper extends ArtStationRipper {
|
||||
public URL artStationUrl = null;
|
||||
|
||||
public ArtstnRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith("artstn.co");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
if (artStationUrl == null) {
|
||||
// Run only once.
|
||||
try {
|
||||
artStationUrl = getFinalUrl(url);
|
||||
if (artStationUrl == null) {
|
||||
throw new IOException("Null url received.");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Couldnt resolve URL.", e);
|
||||
}
|
||||
|
||||
}
|
||||
return super.getGID(artStationUrl);
|
||||
}
|
||||
|
||||
public URL getFinalUrl(URL url) throws IOException {
|
||||
if (url.getHost().endsWith("artstation.com")) {
|
||||
return url;
|
||||
}
|
||||
|
||||
LOGGER.info("Checking url: " + url);
|
||||
Response response = Http.url(url).connection().followRedirects(false).execute();
|
||||
if (response.statusCode() / 100 == 3 && response.hasHeader("location")) {
|
||||
return getFinalUrl(new URL(response.header("location")));
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
@@ -12,11 +12,13 @@ import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
public class ChanRipper extends AbstractHTMLRipper {
|
||||
private static List<ChanSite> explicit_domains = Arrays.asList(
|
||||
private static List<ChanSite> bakedin_explicit_domains = Arrays.asList(
|
||||
new ChanSite("boards.4chan.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")),
|
||||
new ChanSite("boards.4channel.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")),
|
||||
new ChanSite("4archive.org", "imgur.com"),
|
||||
@@ -29,6 +31,34 @@ public class ChanRipper extends AbstractHTMLRipper {
|
||||
new ChanSite("desuarchive.org", "desu-usergeneratedcontent.xyz"),
|
||||
new ChanSite("8ch.net", "media.8ch.net")
|
||||
);
|
||||
private static List<ChanSite> user_give_explicit_domains = getChansFromConfig(Utils.getConfigString("chans.chan_sites", null));
|
||||
private static List<ChanSite> explicit_domains = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* reads a string in the format of site1[cdn|cdn2|cdn3], site2[cdn]
|
||||
*/
|
||||
public static List<ChanSite> getChansFromConfig(String rawChanString) {
|
||||
List<ChanSite> userChans = new ArrayList<>();
|
||||
if (rawChanString != null) {
|
||||
String[] listOfChans = rawChanString.split(",");
|
||||
for (String chanInfo : listOfChans) {
|
||||
// If this is true we're parsing a chan with cdns
|
||||
if (chanInfo.contains("[")) {
|
||||
String siteUrl = chanInfo.split("\\[")[0];
|
||||
String[] cdns = chanInfo.replaceAll(siteUrl + "\\[", "").replaceAll("]", "").split("\\|");
|
||||
LOGGER.debug("site url: " + siteUrl);
|
||||
LOGGER.debug("cdn: " + Arrays.toString(cdns));
|
||||
userChans.add(new ChanSite(siteUrl, Arrays.asList(cdns)));
|
||||
} else {
|
||||
// We're parsing a site without cdns
|
||||
LOGGER.debug("site: " + chanInfo);
|
||||
userChans.add(new ChanSite(chanInfo));
|
||||
}
|
||||
}
|
||||
return userChans;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static List<String> url_piece_blacklist = Arrays.asList(
|
||||
"=http",
|
||||
@@ -43,6 +73,7 @@ public class ChanRipper extends AbstractHTMLRipper {
|
||||
public ChanRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
for (ChanSite _chanSite : explicit_domains) {
|
||||
LOGGER.info(_chanSite.domains);
|
||||
if (_chanSite.domains.contains(url.getHost())) {
|
||||
chanSite = _chanSite;
|
||||
generalChanSite = false;
|
||||
@@ -86,6 +117,10 @@ public class ChanRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
explicit_domains.addAll(bakedin_explicit_domains);
|
||||
if (user_give_explicit_domains != null) {
|
||||
explicit_domains.addAll(user_give_explicit_domains);
|
||||
}
|
||||
for (ChanSite _chanSite : explicit_domains) {
|
||||
if (_chanSite.domains.contains(url.getHost())) {
|
||||
return true;
|
||||
|
@@ -0,0 +1,173 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
/**
|
||||
* @author Tushar
|
||||
*
|
||||
*/
|
||||
public class ComicextraRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String FILE_NAME = "page";
|
||||
|
||||
private Pattern p1 =
|
||||
Pattern.compile("https:\\/\\/www.comicextra.com\\/comic\\/([A-Za-z0-9_-]+)");
|
||||
private Pattern p2 = Pattern.compile(
|
||||
"https:\\/\\/www.comicextra.com\\/([A-Za-z0-9_-]+)\\/([A-Za-z0-9_-]+)(?:\\/full)?");
|
||||
private UrlType urlType = UrlType.UNKNOWN;
|
||||
private List<String> chaptersList = null;
|
||||
private int chapterIndex = -1; // index for the chaptersList, useful in getting the next page.
|
||||
private int imageIndex = 0; // image index for each chapter images.
|
||||
|
||||
public ComicextraRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return "comicextra.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "comicextra";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Matcher m1 = p1.matcher(url.toExternalForm());
|
||||
if (m1.matches()) {
|
||||
// URL is of comic( https://www.comicextra.com/comic/the-punisher-frank-castle-max).
|
||||
urlType = UrlType.COMIC;
|
||||
return m1.group(1);
|
||||
}
|
||||
|
||||
Matcher m2 = p2.matcher(url.toExternalForm());
|
||||
if (m2.matches()) {
|
||||
// URL is of chapter( https://www.comicextra.com/the-punisher-frank-castle-max/chapter-75).
|
||||
urlType = UrlType.CHAPTER;
|
||||
return m2.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected comicextra.com url of type: https://www.comicextra.com/comic/some-comic-name\n"
|
||||
+ " or https://www.comicextra.com/some-comic-name/chapter-001 got " + url
|
||||
+ " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Document getFirstPage() throws IOException {
|
||||
Document doc = null;
|
||||
|
||||
switch (urlType) {
|
||||
case COMIC:
|
||||
// For COMIC type url we extract the urls of each chapters and store them in chapters.
|
||||
chaptersList = new ArrayList<>();
|
||||
Document comicPage = Http.url(url).get();
|
||||
Elements elements = comicPage.select("div.episode-list a");
|
||||
for (Element e : elements) {
|
||||
chaptersList.add(getCompleteChapterUrl(e.attr("abs:href")));
|
||||
}
|
||||
|
||||
// Set the first chapter from the chapterList as the doc.
|
||||
chapterIndex = 0;
|
||||
doc = Http.url(chaptersList.get(chapterIndex)).get();
|
||||
break;
|
||||
case CHAPTER:
|
||||
doc = Http.url(url).get();
|
||||
break;
|
||||
case UNKNOWN:
|
||||
default:
|
||||
throw new IOException("Unknown url type encountered.");
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
if (urlType == UrlType.COMIC) {
|
||||
++chapterIndex;
|
||||
imageIndex = 0; // Resetting the imagesIndex so that images prefix within each chapter starts from '001_'.
|
||||
if (chapterIndex < chaptersList.size()) {
|
||||
return Http.url(chaptersList.get(chapterIndex)).get();
|
||||
}
|
||||
}
|
||||
|
||||
return super.getNextPage(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
List<String> urls = new ArrayList<>();
|
||||
|
||||
if (urlType == UrlType.COMIC || urlType == UrlType.CHAPTER) {
|
||||
Elements images = page.select("img.chapter_img");
|
||||
for (Element img : images) {
|
||||
urls.add(img.attr("src"));
|
||||
}
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
String subdirectory = getSubDirectoryName();
|
||||
String prefix = getPrefix(++imageIndex);
|
||||
|
||||
addURLToDownload(url, prefix, subdirectory, null, null, FILE_NAME, null, Boolean.TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function appends /full at the end of the chapters url to get all the images for the
|
||||
* chapter in the same Document.
|
||||
*/
|
||||
private String getCompleteChapterUrl(String chapterUrl) {
|
||||
if (!chapterUrl.endsWith("/full")) {
|
||||
chapterUrl = chapterUrl + "/full";
|
||||
}
|
||||
return chapterUrl;
|
||||
}
|
||||
|
||||
/*
|
||||
* This functions returns sub folder name for the current chapter.
|
||||
*/
|
||||
private String getSubDirectoryName() {
|
||||
String subDirectory = "";
|
||||
|
||||
if (urlType == UrlType.COMIC) {
|
||||
Matcher m = p2.matcher(chaptersList.get(chapterIndex));
|
||||
if (m.matches()) {
|
||||
subDirectory = m.group(2);
|
||||
}
|
||||
}
|
||||
|
||||
if (urlType == UrlType.CHAPTER) {
|
||||
Matcher m = p2.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
subDirectory = m.group(2);
|
||||
}
|
||||
}
|
||||
|
||||
return subDirectory;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enum to classify different types of urls.
|
||||
*/
|
||||
private enum UrlType {
|
||||
COMIC, CHAPTER, UNKNOWN
|
||||
}
|
||||
}
|
@@ -1,409 +1,645 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import com.rarchives.ripme.utils.Base64;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.io.Serializable;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Base64;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.Connection.Method;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author MrPlaygon
|
||||
*
|
||||
* NOT using Deviantart API like the old JSON ripper because it is SLOW
|
||||
* and somehow annoying to use. Things to consider: Using the API might
|
||||
* be less work/maintenance later because APIs do not change as
|
||||
* frequently as HTML source code does...?
|
||||
*
|
||||
*
|
||||
*
|
||||
* Tested for:
|
||||
*
|
||||
* SFW:
|
||||
*
|
||||
* https://www.deviantart.com/apofiss/gallery/41388863/sceneries
|
||||
* https://www.deviantart.com/kageuri/gallery/
|
||||
* https://www.deviantart.com/kageuri/gallery/?catpath=/
|
||||
* https://www.deviantart.com/apofiss/favourites/39881418/gifts-and
|
||||
* https://www.deviantart.com/kageuri/favourites/
|
||||
* https://www.deviantart.com/kageuri/favourites/?catpath=/
|
||||
*
|
||||
* NSFW:
|
||||
*
|
||||
* https://www.deviantart.com/revpeng/gallery/67734353/Siren-Lee-Agent-of-S-I-R-E-N-S
|
||||
*
|
||||
*
|
||||
* Deactivated account:
|
||||
*
|
||||
* https://www.deviantart.com/gingerbreadpony
|
||||
*
|
||||
* Banned Account:
|
||||
*
|
||||
* https://www.deviantart.com/ghostofflossenburg
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* Login Data (PLEASE DONT ACTUALLY USE!!!):
|
||||
*
|
||||
* email: 5g5_8l4dii5lbbpc@byom.de
|
||||
*
|
||||
* username: 5g58l4dii5lbbpc
|
||||
*
|
||||
* password: 5g5_8l4dii5lbbpc
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class DeviantartRipper extends AbstractHTMLRipper {
|
||||
|
||||
public class DeviantartRipper extends AbstractJSONRipper {
|
||||
String requestID;
|
||||
String galleryID;
|
||||
String username;
|
||||
String baseApiUrl = "https://www.deviantart.com/dapi/v1/gallery/";
|
||||
String csrf;
|
||||
Map<String, String> pageCookies = new HashMap<>();
|
||||
private final String username = "5g58l4dii5lbbpc";
|
||||
private final String password = "5g5_8l4dii5lbbpc";
|
||||
private int offset = 0;
|
||||
private boolean usingCatPath = false;
|
||||
private int downloadCount = 0;
|
||||
private Map<String, String> cookies = new HashMap<String, String>();
|
||||
private DownloadThreadPool deviantartThreadPool = new DownloadThreadPool("deviantart");
|
||||
private ArrayList<String> names = new ArrayList<String>();
|
||||
|
||||
private static final int PAGE_SLEEP_TIME = 3000,
|
||||
IMAGE_SLEEP_TIME = 2000;
|
||||
List<String> allowedCookies = Arrays.asList("agegate_state", "userinfo", "auth", "auth_secure");
|
||||
|
||||
private Map<String,String> cookies = new HashMap<>();
|
||||
private Set<String> triedURLs = new HashSet<>();
|
||||
private Connection conn = null;
|
||||
|
||||
public DeviantartRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
// Constants
|
||||
private final String referer = "https://www.deviantart.com/";
|
||||
private final String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0";
|
||||
private final String utilsKey = "DeviantartLogin.cookies";
|
||||
|
||||
String loginCookies = "auth=__0f9158aaec09f417b235%3B%221ff79836392a515d154216d919eae573%22;" +
|
||||
"auth_secure=__41d14dd0da101f411bb0%3B%2281cf2cf9477776162a1172543aae85ce%22;" +
|
||||
"userinfo=__bf84ac233bfa8ae642e8%3B%7B%22username%22%3A%22grabpy%22%2C%22uniqueid%22%3A%22a0a876aa37dbd4b30e1c80406ee9c280%22%2C%22vd%22%3A%22BbHUXZ%2CBbHUXZ%2CA%2CU%2CA%2C%2CB%2CA%2CB%2CBbHUXZ%2CBbHUdj%2CL%2CL%2CA%2CBbHUdj%2C13%2CA%2CB%2CA%2C%2CA%2CA%2CB%2CA%2CA%2C%2CA%22%2C%22attr%22%3A56%7D";
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return deviantartThreadPool;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "deviantart";
|
||||
}
|
||||
public DeviantartRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "deviantart.com";
|
||||
}
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return "deviantart.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
String u = url.toExternalForm();
|
||||
if (u.contains("/gallery/")) {
|
||||
return url;
|
||||
} else if (u.contains("/favourites")) {
|
||||
return url;
|
||||
} else if (u.contains("/favorites")) {
|
||||
return url;
|
||||
}
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "deviantart";
|
||||
}
|
||||
|
||||
if (!u.endsWith("/gallery/") && !u.endsWith("/gallery")) {
|
||||
if (!u.endsWith("/")) {
|
||||
u += "/gallery/";
|
||||
} else {
|
||||
u += "gallery/";
|
||||
}
|
||||
}
|
||||
@Override
|
||||
protected Document getFirstPage() throws IOException {
|
||||
if (isDeactivated()) {
|
||||
throw new IOException("Account Deactivated");
|
||||
}
|
||||
login();
|
||||
|
||||
// Saving connection to reuse later for following pages.
|
||||
this.conn = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(this.referer)
|
||||
.userAgent(this.userAgent).connection();
|
||||
|
||||
Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/([0-9]+)/*?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (!m.matches()) {
|
||||
String subdir = "/";
|
||||
if (u.contains("catpath=scraps")) {
|
||||
subdir = "scraps";
|
||||
}
|
||||
u = u.replaceAll("\\?.*", "?catpath=" + subdir);
|
||||
}
|
||||
return new URL(u);
|
||||
}
|
||||
return this.conn.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)(/gallery)?/?(\\?.*)?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
// Root gallery
|
||||
if (url.toExternalForm().contains("catpath=scraps")) {
|
||||
return m.group(1) + "_scraps";
|
||||
}
|
||||
else {
|
||||
return m.group(1);
|
||||
}
|
||||
}
|
||||
p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/gallery/([0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
// Subgallery
|
||||
return m.group(1) + "_" + m.group(2);
|
||||
}
|
||||
p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/([0-9]+)/.*?$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1) + "_faves_" + m.group(2);
|
||||
}
|
||||
p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/?$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
// Subgallery
|
||||
return m.group(1) + "_faves";
|
||||
}
|
||||
throw new MalformedURLException("Expected URL format: http://www.deviantart.com/username[/gallery/#####], got: " + url);
|
||||
}
|
||||
/**
|
||||
* Checks if the URL refers to a deactivated account using the HTTP status Codes
|
||||
*
|
||||
* @return true when the account is good
|
||||
* @throws IOException when the account is deactivated
|
||||
*/
|
||||
private boolean isDeactivated() throws IOException {
|
||||
Response res = Http.url(this.url).connection().followRedirects(true).referrer(this.referer)
|
||||
.userAgent(this.userAgent).execute();
|
||||
return res.statusCode() != 200 ? true : false;
|
||||
|
||||
private String getUsernameFromURL(String u) {
|
||||
Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/gallery/?(\\S+)?");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
/**
|
||||
* Stores logged in Cookies. Needed for art pieces only visible to logged in
|
||||
* users.
|
||||
*
|
||||
*
|
||||
* @throws IOException when failed to load webpage or failed to read/write
|
||||
* cookies in file (used when running multiple instances of
|
||||
* RipMe)
|
||||
*/
|
||||
private void login() throws IOException {
|
||||
|
||||
private String getFullsizedNSFWImage(String pageURL) {
|
||||
try {
|
||||
Document doc = Http.url(pageURL).cookies(cookies).get();
|
||||
String imageToReturn = "";
|
||||
String[] d = doc.select("img").attr("srcset").split(",");
|
||||
String customUsername = Utils.getConfigString("DeviantartCustomLoginUsername", this.username);
|
||||
String customPassword = Utils.getConfigString("DeviantartCustomLoginPassword", this.password);
|
||||
try {
|
||||
String dACookies = Utils.getConfigString(utilsKey, null);
|
||||
updateCookie(dACookies != null ? deserialize(dACookies) : null);
|
||||
} catch (ClassNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
if (getDACookie() == null || !checkLogin()) {
|
||||
LOGGER.info("Do Login now");
|
||||
// Do login now
|
||||
|
||||
String s = d[d.length -1].split(" ")[0];
|
||||
LOGGER.info("2:" + s);
|
||||
// Load login page
|
||||
Response res = Http.url("https://www.deviantart.com/users/login").connection().method(Method.GET)
|
||||
.referrer(referer).userAgent(userAgent).execute();
|
||||
|
||||
if (s == null || s.equals("")) {
|
||||
LOGGER.error("Could not find full sized image at " + pageURL);
|
||||
}
|
||||
return s;
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Could not find full sized image at " + pageURL);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
updateCookie(res.cookies());
|
||||
|
||||
/**
|
||||
* Gets first page.
|
||||
* Will determine if login is supplied,
|
||||
* if there is a login, then login and add that login cookies.
|
||||
* Otherwise, just bypass the age gate with an anonymous flag.
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public JSONObject getFirstPage() throws IOException {
|
||||
|
||||
// Base64 da login
|
||||
// username: Z3JhYnB5
|
||||
// password: ZmFrZXJz
|
||||
// Find tokens
|
||||
Document doc = res.parse();
|
||||
Element form = doc.getElementById("login");
|
||||
String token = form.select("input[name=\"validate_token\"]").first().attr("value");
|
||||
String key = form.select("input[name=\"validate_key\"]").first().attr("value");
|
||||
LOGGER.info("Token: " + token + " & Key: " + key);
|
||||
|
||||
// Build Login Data
|
||||
HashMap<String, String> loginData = new HashMap<String, String>();
|
||||
loginData.put("challenge", "");
|
||||
loginData.put("username", customUsername);
|
||||
loginData.put("password", customPassword);
|
||||
loginData.put("remember_me", "1");
|
||||
loginData.put("validate_token", token);
|
||||
loginData.put("validate_key", key);
|
||||
Map<String, String> cookies = res.cookies();
|
||||
|
||||
cookies = getDACookies();
|
||||
if (cookies.isEmpty()) {
|
||||
LOGGER.warn("Failed to get login cookies");
|
||||
cookies.put("agegate_state","1"); // Bypasses the age gate
|
||||
}
|
||||
cookies.put("agegate_state", "1");
|
||||
|
||||
Response res = Http.url(this.url)
|
||||
.cookies(cookies)
|
||||
.response();
|
||||
Document page = res.parse();
|
||||
// Log in using data. Handle redirect
|
||||
res = Http.url("https://www.deviantart.com/users/login").connection().referrer(referer).userAgent(userAgent)
|
||||
.method(Method.POST).data(loginData).cookies(cookies).followRedirects(false).execute();
|
||||
updateCookie(res.cookies());
|
||||
|
||||
JSONObject firstPageJSON = getFirstPageJSON(page);
|
||||
requestID = firstPageJSON.getJSONObject("dapx").getString("requestid");
|
||||
galleryID = getGalleryID(page);
|
||||
username = getUsernameFromURL(url.toExternalForm());
|
||||
csrf = firstPageJSON.getString("csrf");
|
||||
pageCookies = res.cookies();
|
||||
res = Http.url(res.header("location")).connection().referrer(referer).userAgent(userAgent)
|
||||
.method(Method.GET).cookies(cookies).followRedirects(false).execute();
|
||||
|
||||
return requestPage(0, galleryID, username, requestID, csrf, pageCookies);
|
||||
}
|
||||
// Store cookies
|
||||
updateCookie(res.cookies());
|
||||
|
||||
private JSONObject requestPage(int offset, String galleryID, String username, String requestID, String csfr, Map<String, String> c) {
|
||||
LOGGER.debug("offset: " + Integer.toString(offset));
|
||||
LOGGER.debug("galleryID: " + galleryID);
|
||||
LOGGER.debug("username: " + username);
|
||||
LOGGER.debug("requestID: " + requestID);
|
||||
String url = baseApiUrl + galleryID + "?iid=" + requestID;
|
||||
try {
|
||||
Document doc = Http.url(url).cookies(c).data("username", username).data("offset", Integer.toString(offset))
|
||||
.data("limit", "24").data("_csrf", csfr).data("id", requestID)
|
||||
.ignoreContentType().post();
|
||||
return new JSONObject(doc.body().text());
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Got error trying to get page: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
// Write Cookie to file for other RipMe Instances or later use
|
||||
Utils.setConfigString(utilsKey, serialize(new HashMap<String, String>(getDACookie())));
|
||||
Utils.saveConfig(); // save now because of other instances that might work simultaneously
|
||||
|
||||
} else {
|
||||
LOGGER.info("No new Login needed");
|
||||
}
|
||||
|
||||
}
|
||||
LOGGER.info("DA Cookies: " + getDACookie());
|
||||
}
|
||||
|
||||
private JSONObject getFirstPageJSON(Document doc) {
|
||||
for (Element js : doc.select("script")) {
|
||||
if (js.html().contains("requestid")) {
|
||||
String json = js.html().replaceAll("window.__initial_body_data=", "").replaceAll("\\);", "")
|
||||
.replaceAll(";__wake\\(.+", "");
|
||||
JSONObject j = new JSONObject(json);
|
||||
return j;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Returns next page Document using offset.
|
||||
*/
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
this.offset += 24;
|
||||
this.conn.url(urlWithParams(this.offset)).cookies(getDACookie());
|
||||
Response re = this.conn.execute();
|
||||
// Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer).userAgent(userAgent)
|
||||
// .response();
|
||||
updateCookie(re.cookies());
|
||||
Document docu = re.parse();
|
||||
Elements messages = docu.getElementsByClass("message");
|
||||
LOGGER.info("Current Offset: " + this.offset);
|
||||
|
||||
public String getGalleryID(Document doc) {
|
||||
// If the url contains catpath we return 0 as the DA api will provide all galery images if you sent the
|
||||
// gallery id to 0
|
||||
if (url.toExternalForm().contains("catpath=")) {
|
||||
return "0";
|
||||
}
|
||||
Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/[a-zA-Z0-9\\-]+/gallery/([0-9]+)/?\\S+");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
for (Element el : doc.select("input[name=set]")) {
|
||||
try {
|
||||
String galleryID = el.attr("value");
|
||||
return galleryID;
|
||||
} catch (NullPointerException e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
LOGGER.error("Could not find gallery ID");
|
||||
return null;
|
||||
}
|
||||
if (messages.size() > 0) {
|
||||
|
||||
public String getUsername(Document doc) {
|
||||
return doc.select("meta[property=og:title]").attr("content")
|
||||
.replaceAll("'s DeviantArt gallery", "").replaceAll("'s DeviantArt Gallery", "");
|
||||
}
|
||||
|
||||
// if message exists -> last page
|
||||
LOGGER.info("Messages amount: " + messages.size() + " - Next Page does not exists");
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromJSON(JSONObject json) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
JSONArray results = json.getJSONObject("content").getJSONArray("results");
|
||||
for (int i = 0; i < results.length(); i++) {
|
||||
Document doc = Jsoup.parseBodyFragment(results.getJSONObject(i).getString("html"));
|
||||
if (doc.html().contains("ismature")) {
|
||||
LOGGER.info("Downloading nsfw image");
|
||||
String nsfwImage = getFullsizedNSFWImage(doc.select("span").attr("href"));
|
||||
if (nsfwImage != null && nsfwImage.startsWith("http")) {
|
||||
imageURLs.add(nsfwImage);
|
||||
}
|
||||
}
|
||||
try {
|
||||
String imageURL = doc.select("span").first().attr("data-super-full-img");
|
||||
if (!imageURL.isEmpty() && imageURL.startsWith("http")) {
|
||||
imageURLs.add(imageURL);
|
||||
}
|
||||
} catch (NullPointerException e) {
|
||||
LOGGER.info(i + " does not contain any images");
|
||||
}
|
||||
return Http.url(urlWithParams(this.offset)).referrer(referer).userAgent(userAgent).cookies(getDACookie()).get();
|
||||
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns list of Links to the Image pages. NOT links to fullsize image!!! e.g.
|
||||
* https://www.deviantart.com/kageuri/art/RUBY-568396655
|
||||
*/
|
||||
@Override
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
|
||||
@Override
|
||||
public JSONObject getNextPage(JSONObject page) throws IOException {
|
||||
boolean hasMore = page.getJSONObject("content").getBoolean("has_more");
|
||||
if (hasMore) {
|
||||
return requestPage(page.getJSONObject("content").getInt("next_offset"), galleryID, username, requestID, csrf, pageCookies);
|
||||
}
|
||||
List<String> result = new ArrayList<String>();
|
||||
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
Element div;
|
||||
if (usingCatPath) {
|
||||
div = page.getElementById("gmi-");
|
||||
|
||||
@Override
|
||||
public boolean keepSortOrder() {
|
||||
// Don't keep sort order (do not add prefixes).
|
||||
// Causes file duplication, as outlined in https://github.com/4pr0n/ripme/issues/113
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
div = page.getElementsByClass("folderview-art").first().child(0);
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
|
||||
sleep(IMAGE_SLEEP_TIME);
|
||||
}
|
||||
}
|
||||
Elements links = div.select("a.torpedo-thumb-link");
|
||||
|
||||
/**
|
||||
* Tries to get full size image from thumbnail URL
|
||||
* @param thumb Thumbnail URL
|
||||
* @param throwException Whether or not to throw exception when full size image isn't found
|
||||
* @return Full-size image URL
|
||||
* @throws Exception If it can't find the full-size URL
|
||||
*/
|
||||
private static String thumbToFull(String thumb, boolean throwException) throws Exception {
|
||||
thumb = thumb.replace("http://th", "http://fc");
|
||||
List<String> fields = new ArrayList<>(Arrays.asList(thumb.split("/")));
|
||||
fields.remove(4);
|
||||
if (!fields.get(4).equals("f") && throwException) {
|
||||
// Not a full-size image
|
||||
throw new Exception("Can't get full size image from " + thumb);
|
||||
}
|
||||
StringBuilder result = new StringBuilder();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
if (i > 0) {
|
||||
result.append("/");
|
||||
}
|
||||
result.append(fields.get(i));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
for (Element el : links) {
|
||||
result.add(el.attr("href"));
|
||||
|
||||
}
|
||||
|
||||
LOGGER.info("Amount of Images on Page: " + result.size());
|
||||
LOGGER.info(page.location());
|
||||
|
||||
/**
|
||||
* If largest resolution for image at 'thumb' is found, starts downloading
|
||||
* and returns null.
|
||||
* If it finds a larger resolution on another page, returns the image URL.
|
||||
* @param thumb Thumbnail URL
|
||||
* @param page Page the thumbnail is retrieved from
|
||||
* @return Highest-resolution version of the image based on thumbnail URL and the page.
|
||||
*/
|
||||
private String smallToFull(String thumb, String page) {
|
||||
try {
|
||||
// Fetch the image page
|
||||
Response resp = Http.url(page)
|
||||
.referrer(this.url)
|
||||
.cookies(cookies)
|
||||
.response();
|
||||
cookies.putAll(resp.cookies());
|
||||
Document doc = resp.parse();
|
||||
Elements els = doc.select("img.dev-content-full");
|
||||
String fsimage = null;
|
||||
// Get the largest resolution image on the page
|
||||
if (!els.isEmpty()) {
|
||||
// Large image
|
||||
fsimage = els.get(0).attr("src");
|
||||
LOGGER.info("Found large-scale: " + fsimage);
|
||||
if (fsimage.contains("//orig")) {
|
||||
return fsimage;
|
||||
}
|
||||
}
|
||||
// Try to find the download button
|
||||
els = doc.select("a.dev-page-download");
|
||||
if (!els.isEmpty()) {
|
||||
// Full-size image
|
||||
String downloadLink = els.get(0).attr("href");
|
||||
LOGGER.info("Found download button link: " + downloadLink);
|
||||
HttpURLConnection con = (HttpURLConnection) new URL(downloadLink).openConnection();
|
||||
con.setRequestProperty("Referer",this.url.toString());
|
||||
String cookieString = "";
|
||||
for (Map.Entry<String, String> entry : cookies.entrySet()) {
|
||||
cookieString = cookieString + entry.getKey() + "=" + entry.getValue() + "; ";
|
||||
}
|
||||
cookieString = cookieString.substring(0,cookieString.length() - 1);
|
||||
con.setRequestProperty("Cookie",cookieString);
|
||||
con.setRequestProperty("User-Agent", USER_AGENT);
|
||||
con.setInstanceFollowRedirects(true);
|
||||
con.connect();
|
||||
int code = con.getResponseCode();
|
||||
String location = con.getURL().toString();
|
||||
con.disconnect();
|
||||
if (location.contains("//orig")) {
|
||||
fsimage = location;
|
||||
LOGGER.info("Found image download: " + location);
|
||||
}
|
||||
}
|
||||
if (fsimage != null) {
|
||||
return fsimage;
|
||||
}
|
||||
throw new IOException("No download page found");
|
||||
} catch (IOException ioe) {
|
||||
try {
|
||||
LOGGER.info("Failed to get full size download image at " + page + " : '" + ioe.getMessage() + "'");
|
||||
String lessThanFull = thumbToFull(thumb, false);
|
||||
LOGGER.info("Falling back to less-than-full-size image " + lessThanFull);
|
||||
return lessThanFull;
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns DA cookies.
|
||||
* @return Map of cookies containing session data.
|
||||
*/
|
||||
private Map<String, String> getDACookies() {
|
||||
return RipUtils.getCookiesFromString(Utils.getConfigString("deviantart.cookies", loginCookies));
|
||||
}
|
||||
/**
|
||||
* Starts new Thread to find download link + filename + filetype
|
||||
*/
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
this.downloadCount += 1;
|
||||
LOGGER.info("Downloading URL Number " + this.downloadCount);
|
||||
LOGGER.info("Deviant Art URL: " + url.toExternalForm());
|
||||
try {
|
||||
Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer)
|
||||
.userAgent(userAgent).response();
|
||||
updateCookie(re.cookies());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
// Start Thread and add to pool.
|
||||
DeviantartImageThread t = new DeviantartImageThread(url);
|
||||
deviantartThreadPool.addThread(t);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String normalizeUrl(String url) {
|
||||
return (urlWithParams(this.offset).toExternalForm());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns name of album. Album name consists of 3 words: - Artist (owner of
|
||||
* gallery) - Type (gallery or favorites folder) - Name of the folder
|
||||
*
|
||||
* Returns artist_type_name
|
||||
*/
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
|
||||
String s = url.toExternalForm();
|
||||
String artist = "unknown";
|
||||
String what = "unknown";
|
||||
String albumname = "unknown";
|
||||
|
||||
if (url.toExternalForm().contains("catpath=/")) {
|
||||
this.usingCatPath = true;
|
||||
}
|
||||
|
||||
Pattern p = Pattern.compile("^https?://www.deviantart\\.com/([a-zA-Z0-9]+).*$");
|
||||
Matcher m = p.matcher(s);
|
||||
|
||||
// Artist
|
||||
if (m.matches()) {
|
||||
artist = m.group(1);
|
||||
} else {
|
||||
throw new MalformedURLException("Expected deviantart.com URL format: "
|
||||
+ "www.deviantart.com/<ARTIST>/gallery/<NUMBERS>/<NAME>\nOR\nwww.deviantart.com/<ARTIST>/favourites/<NUMBERS>/<NAME>\\nOr simply the gallery or favorites of some artist - got "
|
||||
+ url + " instead");
|
||||
}
|
||||
|
||||
// What is it
|
||||
if (s.contains("/gallery/")) {
|
||||
what = "gallery";
|
||||
} else if (s.contains("/favourites/")) {
|
||||
what = "favourites";
|
||||
} else {
|
||||
throw new MalformedURLException("Expected deviantart.com URL format: "
|
||||
+ "www.deviantart.com/<ARTIST>/gallery/<NUMBERS>/<NAME>\nOR\nwww.deviantart.com/<ARTIST>/favourites/<NUMBERS>/<NAME>\nOr simply the gallery or favorites of some artist - got "
|
||||
+ url + " instead");
|
||||
}
|
||||
|
||||
// Album Name
|
||||
Pattern artistP = Pattern
|
||||
.compile("^https?://www.deviantart\\.com/[a-zA-Z0-9]+/[a-zA-Z]+/[0-9]+/([a-zA-Z0-9-]+).*$");
|
||||
Matcher artistM = artistP.matcher(s);
|
||||
if (s.endsWith("?catpath=/")) {
|
||||
albumname = "all";
|
||||
} else if (s.endsWith("/favourites/") || s.endsWith("/gallery/")) {
|
||||
albumname = "featured";
|
||||
} else if (artistM.matches()) {
|
||||
albumname = artistM.group(1);
|
||||
}
|
||||
LOGGER.info("Album Name: " + artist + "_" + what + "_" + albumname);
|
||||
|
||||
return artist + "_" + what + "_" + albumname;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return Clean URL as String
|
||||
*/
|
||||
private String cleanURL() {
|
||||
return (this.url.toExternalForm().split("\\?"))[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return correct url with params (catpath) and current offset
|
||||
*
|
||||
* @return URL to page with offset
|
||||
*/
|
||||
private URL urlWithParams(int offset) {
|
||||
try {
|
||||
String url = cleanURL();
|
||||
if (this.usingCatPath) {
|
||||
return (new URL(url + "?catpath=/&offset=" + offset));
|
||||
} else {
|
||||
return (new URL(url + "?offset=" + offset));
|
||||
}
|
||||
} catch (MalformedURLException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns Hashmap usable as Cookie for NSFW Artworks Not really needed but
|
||||
* maybe useful later.
|
||||
*
|
||||
* @return Cookie Hashmap
|
||||
*/
|
||||
private Map<String, String> getDACookie() {
|
||||
return this.cookies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates cookies
|
||||
*
|
||||
* @param m new Cookies
|
||||
*/
|
||||
private void updateCookie(Map<String, String> m) {
|
||||
|
||||
if (m == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
Iterator<String> iter = m.keySet().iterator();
|
||||
while (iter.hasNext()) {
|
||||
String current = iter.next();
|
||||
if (!this.allowedCookies.contains(current)) {
|
||||
// m.remove(current);
|
||||
iter.remove();
|
||||
}
|
||||
}
|
||||
|
||||
LOGGER.info("Updating Cookies");
|
||||
LOGGER.info("Old Cookies: " + getDACookie() + " ");
|
||||
LOGGER.info("New Cookies: " + m + " ");
|
||||
this.cookies.putAll(m);
|
||||
this.cookies.put("agegate_state", "1");
|
||||
LOGGER.info("Merged Cookies: " + getDACookie() + " ");
|
||||
|
||||
try {
|
||||
Utils.setConfigString(utilsKey, serialize(new HashMap<String, String>(getDACookie())));
|
||||
Utils.saveConfig();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Serializes an Object and returns a String ready to store Used to store
|
||||
* cookies in the config file because the deviantart cookies contain all sort of
|
||||
* special characters like ; , = : and so on.
|
||||
*
|
||||
* @param o Object to serialize
|
||||
* @return The serialized base64 encoded object
|
||||
* @throws IOException
|
||||
*/
|
||||
private String serialize(Serializable o) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
ObjectOutputStream oos = new ObjectOutputStream(baos);
|
||||
oos.writeObject(o);
|
||||
oos.close();
|
||||
return Base64.getEncoder().encodeToString(baos.toByteArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Recreates the object from the base64 encoded String. Used for Cookies
|
||||
*
|
||||
* @param s the Base64 encoded string
|
||||
* @return the Cookie Map
|
||||
* @throws IOException
|
||||
* @throws ClassNotFoundException
|
||||
*/
|
||||
private Map<String, String> deserialize(String s) throws IOException, ClassNotFoundException {
|
||||
byte[] data = Base64.getDecoder().decode(s);
|
||||
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data));
|
||||
HashMap<String, String> o = (HashMap<String, String>) ois.readObject(); // Unchecked cast here but should never
|
||||
// be something else
|
||||
ois.close();
|
||||
return o;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current cookies are still valid/usable. Also checks if agegate
|
||||
* is given.
|
||||
*
|
||||
* @return True when all is good.
|
||||
*/
|
||||
private boolean checkLogin() {
|
||||
if (!getDACookie().containsKey("agegate_state")) {
|
||||
LOGGER.info("No agegate key");
|
||||
return false;
|
||||
} else if (!getDACookie().get("agegate_state").equals("1")) {
|
||||
LOGGER.info("Wrong agegate value");
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
LOGGER.info("Login with Cookies: " + getDACookie());
|
||||
Response res = Http.url("https://www.deviantart.com/users/login").connection().followRedirects(true)
|
||||
.cookies(getDACookie()).referrer(this.referer).userAgent(this.userAgent).execute();
|
||||
if (!res.url().toExternalForm().equals("https://www.deviantart.com/users/login")) {
|
||||
LOGGER.info("Cookies are valid");
|
||||
LOGGER.info(res.url());
|
||||
return true;
|
||||
} else {
|
||||
LOGGER.info("Cookies invalid. Wrong URL: " + res.url());
|
||||
LOGGER.info(res.statusCode());
|
||||
LOGGER.info(res.parse());
|
||||
return false;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyzes an image page like
|
||||
* https://www.deviantart.com/kageuri/art/RUBY-568396655 .
|
||||
*
|
||||
* Looks for download button, follows the authentications and redirects and adds
|
||||
* the Image URL to the download queue. If no download button is present it will
|
||||
* use the largest version of the image.
|
||||
*
|
||||
* Should work with all filetypes on Deviantart. Tested with .JPG .PNG and .PDF
|
||||
*
|
||||
* @author MrPlaygon
|
||||
*
|
||||
*/
|
||||
private class DeviantartImageThread extends Thread {
|
||||
private URL url;
|
||||
|
||||
public DeviantartImageThread(URL url) {
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
getFullSizeURL();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get URL to Artwork and return fullsize URL with file ending.
|
||||
*
|
||||
* @param page Like
|
||||
* https://www.deviantart.com/apofiss/art/warmest-of-the-days-455668450
|
||||
* @return URL like
|
||||
* https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/07f7a6bb-2d35-4630-93fc-be249af22b3e/d7jak0y-d20e5932-df72-4d13-b002-5e122037b373.jpg
|
||||
*
|
||||
*
|
||||
*/
|
||||
private void getFullSizeURL() {
|
||||
|
||||
LOGGER.info("Searching max. Resolution for " + url);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, "Searching max. resolution for " + url);
|
||||
try {
|
||||
Response re = Http.url(url).connection().referrer(referer).userAgent(userAgent).cookies(getDACookie())
|
||||
.execute();
|
||||
Document doc = re.parse();
|
||||
|
||||
// Artwork Title
|
||||
String title = doc.select("a.title").first().html();
|
||||
title = title.replaceAll("[^a-zA-Z0-9\\.\\-]", "_").toLowerCase();
|
||||
|
||||
int counter = 1;
|
||||
if (names.contains(title)) {
|
||||
while (names.contains(title + "_" + counter)) {
|
||||
counter++;
|
||||
}
|
||||
title = title + "_" + counter;
|
||||
}
|
||||
names.add(title);
|
||||
|
||||
// Check for download button
|
||||
Element downloadButton = null;
|
||||
|
||||
downloadButton = doc.select("a.dev-page-download").first();
|
||||
|
||||
// Download Button
|
||||
if (downloadButton != null) {
|
||||
LOGGER.info("Download Button found: " + downloadButton.attr("href"));
|
||||
|
||||
Response download = Http.url(downloadButton.attr("href")).connection().cookies(getDACookie())
|
||||
.method(Method.GET).referrer(referer).userAgent(userAgent).ignoreContentType(true)
|
||||
.followRedirects(true).execute();
|
||||
URL location = download.url();
|
||||
|
||||
System.out.println("----------------> " + url);
|
||||
String[] filetypePart = download.header("Content-Disposition").split("\\.");
|
||||
|
||||
LOGGER.info("Found Image URL");
|
||||
LOGGER.info(url);
|
||||
LOGGER.info(location);
|
||||
|
||||
addURLToDownload(location, "", "", "", getDACookie(),
|
||||
title + "." + filetypePart[filetypePart.length - 1]);
|
||||
return;
|
||||
}
|
||||
|
||||
// No Download Button
|
||||
Element div = doc.select("div.dev-view-deviation").first();
|
||||
|
||||
Element image = div.getElementsByTag("img").first();
|
||||
|
||||
String source = "";
|
||||
if (image == null) {
|
||||
LOGGER.error("ERROR on " + url);
|
||||
|
||||
LOGGER.error("Cookies: " + getDACookie() + " ");
|
||||
LOGGER.error(div);
|
||||
sendUpdate(STATUS.DOWNLOAD_ERRORED, "ERROR at\n" + url);
|
||||
return;
|
||||
}
|
||||
|
||||
// When it is text art (e.g. story) the only image is the avator (profile
|
||||
// picture)
|
||||
if (image.hasClass("avatar")) {
|
||||
LOGGER.error("No Image found, probably text art");
|
||||
LOGGER.error(url);
|
||||
return;
|
||||
}
|
||||
|
||||
source = image.attr("src");
|
||||
|
||||
String[] parts = source.split("/v1/");
|
||||
|
||||
// Image page uses scaled down version. Split at /v1/ to receive max size.
|
||||
if (parts.length > 2) {
|
||||
LOGGER.error("Unexpected URL Format");
|
||||
sendUpdate(STATUS.DOWNLOAD_ERRORED, "Unexpected URL Format");
|
||||
return;
|
||||
}
|
||||
|
||||
String[] tmpParts = parts[0].split("\\.");
|
||||
|
||||
LOGGER.info("Found Image URL");
|
||||
LOGGER.info(url);
|
||||
LOGGER.info(parts[0]);
|
||||
while (Http.url(parts[0]).connection().execute().statusCode() == 404) {
|
||||
try {
|
||||
LOGGER.error("404 on " + url);
|
||||
Thread.sleep(1000);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
addURLToDownload(new URL(parts[0]), "", "", "", new HashMap<String, String>(),
|
||||
title + "." + tmpParts[tmpParts.length - 1]);
|
||||
return;
|
||||
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
LOGGER.error("No Full Size URL for: " + url);
|
||||
sendUpdate(STATUS.DOWNLOAD_ERRORED, "No image found for " + url);
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
@@ -6,8 +6,6 @@ import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@@ -18,136 +16,154 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
public class E621Ripper extends AbstractHTMLRipper{
|
||||
private static final Logger logger = Logger.getLogger(E621Ripper.class);
|
||||
public class E621Ripper extends AbstractHTMLRipper {
|
||||
private static final Logger logger = Logger.getLogger(E621Ripper.class);
|
||||
|
||||
private static Pattern gidPattern=null;
|
||||
private static Pattern gidPattern2=null;
|
||||
private static Pattern gidPatternPool=null;
|
||||
private static Pattern gidPattern = null;
|
||||
private static Pattern gidPattern2 = null;
|
||||
private static Pattern gidPatternPool = null;
|
||||
|
||||
private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621");
|
||||
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
|
||||
|
||||
public E621Ripper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return e621ThreadPool;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "e621.net";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "e621";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if(url.getPath().startsWith("/pool/show/"))
|
||||
return Http.url("https://e621.net/pool/show/"+getTerm(url)).get();
|
||||
else
|
||||
return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get();
|
||||
}
|
||||
|
||||
private String getFullSizedImage(String url) {
|
||||
try {
|
||||
Document page = Http.url("https://e621.net" + url).get();
|
||||
Elements video = page.select("video > source");
|
||||
Elements flash = page.select("embed");
|
||||
Elements image = page.select("a#highres");
|
||||
if (video.size() > 0) {
|
||||
return video.attr("src");
|
||||
} else if (flash.size() > 0) {
|
||||
return flash.attr("src");
|
||||
} else if (image.size() > 0) {
|
||||
return image.attr("href");
|
||||
} else {
|
||||
throw new IOException();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.error("Unable to get full sized image from " + url);
|
||||
return null;
|
||||
}
|
||||
public E621Ripper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
Elements elements = page.select("div > span.thumb > a");
|
||||
List<String> res = new ArrayList<>();
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return e621ThreadPool;
|
||||
}
|
||||
|
||||
for(Element e:elements) {
|
||||
if (!e.attr("href").isEmpty()) {
|
||||
String fullSizedImage = getFullSizedImage(e.attr("href"));
|
||||
if (fullSizedImage != null && !fullSizedImage.equals("")) {
|
||||
res.add(getFullSizedImage(e.attr("href")));
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "e621.net";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "e621";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (url.getPath().startsWith("/pool/show/"))
|
||||
return Http.url("https://e621.net/pool/show/" + getTerm(url)).get();
|
||||
else
|
||||
return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
Elements elements = page.select("div > span.thumb > a");
|
||||
List<String> res = new ArrayList<>();
|
||||
|
||||
for (Element e : elements) {
|
||||
if (!e.attr("href").isEmpty()) {
|
||||
res.add(e.attr("abs:href"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document page) throws IOException {
|
||||
if (page.select("a.next_page") != null) {
|
||||
return Http.url("https://e621.net" + page.select("a.next_page").attr("href")).get();
|
||||
@Override
|
||||
public Document getNextPage(Document page) throws IOException {
|
||||
if (!page.select("a.next_page").isEmpty()) {
|
||||
return Http.url(page.select("a.next_page").attr("abs:href")).get();
|
||||
} else {
|
||||
throw new IOException("No more pages");
|
||||
throw new IOException("No more pages.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(final URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
@Override
|
||||
public void downloadURL(final URL url, int index) {
|
||||
// addURLToDownload(url, getPrefix(index));
|
||||
e621ThreadPool.addThread(new E621FileThread(url, getPrefix(index)));
|
||||
}
|
||||
|
||||
private String getTerm(URL url) throws MalformedURLException{
|
||||
if(gidPattern==null)
|
||||
gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$");
|
||||
if(gidPatternPool==null)
|
||||
gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
|
||||
private String getTerm(URL url) throws MalformedURLException {
|
||||
if (gidPattern == null)
|
||||
gidPattern = Pattern.compile(
|
||||
"^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$");
|
||||
if (gidPatternPool == null)
|
||||
gidPatternPool = Pattern.compile(
|
||||
"^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
|
||||
|
||||
Matcher m = gidPattern.matcher(url.toExternalForm());
|
||||
if(m.matches()) {
|
||||
Matcher m = gidPattern.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
LOGGER.info(m.group(2));
|
||||
return m.group(2);
|
||||
}
|
||||
|
||||
m = gidPatternPool.matcher(url.toExternalForm());
|
||||
if(m.matches()) {
|
||||
m = gidPatternPool.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(2);
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
|
||||
}
|
||||
throw new MalformedURLException(
|
||||
"Expected e621.net URL format: e621.net/post/index/1/searchterm - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
String prefix = "";
|
||||
if (url.getPath().startsWith("/pool/show/")) {
|
||||
prefix = "pool_";
|
||||
}
|
||||
return Utils.filesystemSafe(prefix + getTerm(url));
|
||||
}
|
||||
|
||||
String prefix="";
|
||||
if (url.getPath().startsWith("/pool/show/")) {
|
||||
prefix = "pool_";
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
if (gidPattern2 == null)
|
||||
gidPattern2 = Pattern.compile(
|
||||
"^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$");
|
||||
|
||||
Matcher m = gidPattern2.matcher(url.toExternalForm());
|
||||
if (m.matches())
|
||||
return new URL("https://e621.net/post/index/1/" + m.group(2).replace("+", "%20"));
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
public class E621FileThread extends Thread {
|
||||
|
||||
private URL url;
|
||||
private String index;
|
||||
|
||||
public E621FileThread(URL url, String index) {
|
||||
this.url = url;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
String fullSizedImage = getFullSizedImage(url);
|
||||
if (fullSizedImage != null && !fullSizedImage.equals("")) {
|
||||
addURLToDownload(new URL(fullSizedImage), index);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.error("Unable to get full sized image from " + url);
|
||||
}
|
||||
}
|
||||
|
||||
private String getFullSizedImage(URL imageURL) throws IOException {
|
||||
Document page = Http.url(imageURL).retries(3).get();
|
||||
Elements video = page.select("video > source");
|
||||
Elements flash = page.select("embed");
|
||||
Elements image = page.select("a#highres");
|
||||
if (video.size() > 0) {
|
||||
return video.attr("src");
|
||||
} else if (flash.size() > 0) {
|
||||
return flash.attr("src");
|
||||
} else if (image.size() > 0) {
|
||||
return image.attr("href");
|
||||
} else {
|
||||
throw new IOException();
|
||||
}
|
||||
|
||||
return Utils.filesystemSafe(prefix+getTerm(url));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
if(gidPattern2==null)
|
||||
gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$");
|
||||
|
||||
Matcher m = gidPattern2.matcher(url.toExternalForm());
|
||||
if(m.matches())
|
||||
return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20"));
|
||||
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -115,8 +115,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
String image = null;
|
||||
if (thumb.hasAttr("data-cfsrc")) {
|
||||
image = thumb.attr("data-cfsrc");
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// Deobfustace the json data
|
||||
String rawJson = deobfuscateJSON(page.select("script#ractive-public").html()
|
||||
.replaceAll(">", ">").replaceAll("<", "<").replace("&", "&"));
|
||||
@@ -125,17 +124,16 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
for (int i = 0; i != json.getJSONArray("pictures").length(); i++) {
|
||||
image = "https://www.8muses.com/image/fl/" + json.getJSONArray("pictures").getJSONObject(i).getString("publicUri");
|
||||
URL imageUrl = new URL(image);
|
||||
if (Utils.getConfigBoolean("8muses.use_short_names", false)) {
|
||||
addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, "", null, true);
|
||||
} else {
|
||||
addURLToDownload(imageUrl, getPrefixLong(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, "", null, true);
|
||||
}
|
||||
addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, "", null, true);
|
||||
// X is our page index
|
||||
x++;
|
||||
if (isThisATest()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
continue;
|
||||
return imageURLs;
|
||||
} catch (MalformedURLException e) {
|
||||
LOGGER.error("\"" + image + "\" is malformed");
|
||||
}
|
||||
}
|
||||
if (!image.contains("8muses.com")) {
|
||||
|
@@ -0,0 +1,119 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class ErofusRipper extends AbstractHTMLRipper {
|
||||
|
||||
public ErofusRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasASAPRipping() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "erofus";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "erofus.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https://www.erofus.com/comics/([a-zA-Z0-9\\-_]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (!m.matches()) {
|
||||
throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
|
||||
}
|
||||
return m.group(m.groupCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
LOGGER.info(page);
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
int x = 1;
|
||||
if (pageContainsImages(page)) {
|
||||
LOGGER.info("Page contains images");
|
||||
ripAlbum(page);
|
||||
} else {
|
||||
// This contains the thumbnails of all images on the page
|
||||
Elements pageImages = page.select("a.a-click");
|
||||
for (Element pageLink : pageImages) {
|
||||
if (super.isStopped()) break;
|
||||
if (pageLink.attr("href").contains("comics")) {
|
||||
String subUrl = "https://erofus.com" + pageLink.attr("href");
|
||||
try {
|
||||
LOGGER.info("Retrieving " + subUrl);
|
||||
sendUpdate(RipStatusMessage.STATUS.LOADING_RESOURCE, subUrl);
|
||||
Document subPage = Http.url(subUrl).get();
|
||||
List<String> subalbumImages = getURLsFromPage(subPage);
|
||||
} catch (IOException e) {
|
||||
LOGGER.warn("Error while loading subalbum " + subUrl, e);
|
||||
}
|
||||
}
|
||||
if (isThisATest()) break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
public void ripAlbum(Document page) {
|
||||
int x = 1;
|
||||
Elements thumbs = page.select("a.a-click > div.thumbnail > img");
|
||||
for (Element thumb : thumbs) {
|
||||
String image = "https://www.erofus.com" + thumb.attr("src").replaceAll("thumb", "medium");
|
||||
try {
|
||||
Map<String,String> opts = new HashMap<String, String>();
|
||||
opts.put("subdirectory", page.title().replaceAll(" \\| Erofus - Sex and Porn Comics", "").replaceAll(" ", "_"));
|
||||
opts.put("prefix", getPrefix(x));
|
||||
addURLToDownload(new URL(image), opts);
|
||||
} catch (MalformedURLException e) {
|
||||
LOGGER.info(e.getMessage());
|
||||
}
|
||||
x++;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean pageContainsImages(Document page) {
|
||||
Elements pageImages = page.select("a.a-click");
|
||||
for (Element pageLink : pageImages) {
|
||||
if (pageLink.attr("href").contains("/pic/")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
@@ -51,7 +51,7 @@ public class EromeRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public boolean pageContainsAlbums(URL url) {
|
||||
Pattern pa = Pattern.compile("https?://www.erome.com/([a-zA-Z0-9_-]*)/?");
|
||||
Pattern pa = Pattern.compile("https?://www.erome.com/([a-zA-Z0-9_\\-?=]*)/?");
|
||||
Matcher ma = pa.matcher(url.toExternalForm());
|
||||
return ma.matches();
|
||||
}
|
||||
@@ -111,7 +111,7 @@ public class EromeRipper extends AbstractHTMLRipper {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
p = Pattern.compile("^https?://www.erome.com/([a-zA-Z0-9_-]+)/?$");
|
||||
p = Pattern.compile("^https?://www.erome.com/([a-zA-Z0-9_\\-?=]+)/?$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
|
||||
if (m.matches()) {
|
||||
|
@@ -219,16 +219,21 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern
|
||||
.compile("^https?://www\\.furaffinity\\.net/gallery/([-_.0-9a-zA-Z]+).*$");
|
||||
// Gallery
|
||||
Pattern p = Pattern.compile("^https?://www\\.furaffinity\\.net/gallery/([-_.0-9a-zA-Z]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected furaffinity.net URL format: "
|
||||
+ "www.furaffinity.net/gallery/username - got " + url
|
||||
+ " instead");
|
||||
//Scraps
|
||||
p = Pattern.compile("^https?://www\\.furaffinity\\.net/scraps/([-_.0-9a-zA-Z]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Unable to find images in" + url);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,23 +1,30 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection.Method;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class FuskatorRipper extends AbstractHTMLRipper {
|
||||
|
||||
private String jsonurl = "https://fuskator.com/ajax/gal.aspx";
|
||||
private String xAuthUrl = "https://fuskator.com/ajax/auth.aspx";
|
||||
private String xAuthToken;
|
||||
private Map<String, String> cookies;
|
||||
|
||||
public FuskatorRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
@@ -26,6 +33,7 @@ public class FuskatorRipper extends AbstractHTMLRipper {
|
||||
public String getHost() {
|
||||
return "fuskator";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "fuskator.com";
|
||||
@@ -37,45 +45,55 @@ public class FuskatorRipper extends AbstractHTMLRipper {
|
||||
if (u.contains("/thumbs/")) {
|
||||
u = u.replace("/thumbs/", "/full/");
|
||||
}
|
||||
if (u.contains("/expanded/")) {
|
||||
u = u.replaceAll("/expanded/", "/full/");
|
||||
}
|
||||
return new URL(u);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^.*fuskator.com/full/([a-zA-Z0-9\\-]+).*$");
|
||||
Pattern p = Pattern.compile("^.*fuskator.com/full/([a-zA-Z0-9\\-~]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException(
|
||||
"Expected fuskator.com gallery formats: "
|
||||
+ "fuskator.com/full/id/..."
|
||||
+ " Got: " + url);
|
||||
"Expected fuskator.com gallery formats: " + "fuskator.com/full/id/..." + " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
// return Http.url(url).get();
|
||||
Response res = Http.url(url).response();
|
||||
cookies = res.cookies();
|
||||
return res.parse();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
String html = doc.html();
|
||||
// Get "baseUrl"
|
||||
String baseUrl = Utils.between(html, "unescape('", "'").get(0);
|
||||
JSONObject json;
|
||||
|
||||
try {
|
||||
baseUrl = URLDecoder.decode(baseUrl, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
LOGGER.warn("Error while decoding " + baseUrl, e);
|
||||
getXAuthToken();
|
||||
if (xAuthToken == null || xAuthToken.isEmpty()) {
|
||||
throw new IOException("No xAuthToken found.");
|
||||
}
|
||||
|
||||
// All good. Fetch JSON data from jsonUrl.
|
||||
json = Http.url(jsonurl).cookies(cookies).data("X-Auth", xAuthToken).data("hash", getGID(url))
|
||||
.data("_", Long.toString(System.currentTimeMillis())).getJSON();
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Couldnt fetch images.", e.getCause());
|
||||
return imageURLs;
|
||||
}
|
||||
if (baseUrl.startsWith("//")) {
|
||||
baseUrl = "http:" + baseUrl;
|
||||
}
|
||||
// Iterate over images
|
||||
for (String filename : Utils.between(html, "+'", "'")) {
|
||||
imageURLs.add(baseUrl + filename);
|
||||
|
||||
JSONArray imageArray = json.getJSONArray("images");
|
||||
for (int i = 0; i < imageArray.length(); i++) {
|
||||
imageURLs.add("https:" + imageArray.getJSONObject(i).getString("imageUrl"));
|
||||
}
|
||||
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@@ -83,4 +101,12 @@ public class FuskatorRipper extends AbstractHTMLRipper {
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
private void getXAuthToken() throws IOException {
|
||||
if (cookies == null || cookies.isEmpty()) {
|
||||
throw new IOException("Null cookies or no cookies found.");
|
||||
}
|
||||
Response res = Http.url(xAuthUrl).cookies(cookies).method(Method.POST).response();
|
||||
xAuthToken = res.body();
|
||||
}
|
||||
}
|
||||
|
@@ -9,16 +9,24 @@ import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
|
||||
public class GfycatRipper extends AbstractSingleFileRipper {
|
||||
public class GfycatRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String HOST = "gfycat.com";
|
||||
String username = "";
|
||||
String cursor = "";
|
||||
String count = "30";
|
||||
|
||||
|
||||
|
||||
public GfycatRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@@ -45,10 +53,20 @@ public class GfycatRipper extends AbstractSingleFileRipper {
|
||||
|
||||
return url;
|
||||
}
|
||||
public boolean isProfile() {
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
return m.matches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
if (!isProfile()) {
|
||||
return Http.url(url).get();
|
||||
} else {
|
||||
username = getGID(url);
|
||||
return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats")).ignoreContentType().get();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -58,7 +76,7 @@ public class GfycatRipper extends AbstractSingleFileRipper {
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/([a-zA-Z0-9]+).*$");
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@?([a-zA-Z0-9]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
@@ -70,15 +88,45 @@ public class GfycatRipper extends AbstractSingleFileRipper {
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
private String stripHTMLTags(String t) {
|
||||
t = t.replaceAll("<html>\n" +
|
||||
" <head></head>\n" +
|
||||
" <body>", "");
|
||||
t.replaceAll("</body>\n" +
|
||||
"</html>", "");
|
||||
t = t.replaceAll("\n", "");
|
||||
t = t.replaceAll("=\"\"", "");
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
if (cursor.equals("")) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
Elements videos = doc.select("source");
|
||||
String vidUrl = videos.first().attr("src");
|
||||
if (vidUrl.startsWith("//")) {
|
||||
vidUrl = "http:" + vidUrl;
|
||||
if (isProfile()) {
|
||||
JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
|
||||
JSONArray content = page.getJSONArray("gfycats");
|
||||
for (int i = 0; i < content.length(); i++) {
|
||||
result.add(content.getJSONObject(i).getString("mp4Url"));
|
||||
}
|
||||
cursor = page.getString("cursor");
|
||||
} else {
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
result.add(page.getJSONObject("video").getString("contentUrl"));
|
||||
}
|
||||
}
|
||||
}
|
||||
result.add(vidUrl);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -95,14 +143,14 @@ public class GfycatRipper extends AbstractSingleFileRipper {
|
||||
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
|
||||
|
||||
Document doc = Http.url(url).get();
|
||||
Elements videos = doc.select("source");
|
||||
if (videos.isEmpty()) {
|
||||
throw new IOException("Could not find source at " + url);
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
return page.getJSONObject("video").getString("contentUrl");
|
||||
}
|
||||
}
|
||||
String vidUrl = videos.first().attr("src");
|
||||
if (vidUrl.startsWith("//")) {
|
||||
vidUrl = "http:" + vidUrl;
|
||||
}
|
||||
return vidUrl;
|
||||
throw new IOException();
|
||||
}
|
||||
}
|
@@ -10,6 +10,7 @@ import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.jsoup.Connection.Method;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.nodes.Document;
|
||||
@@ -53,8 +54,8 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
||||
Response resp;
|
||||
Document doc;
|
||||
|
||||
resp = Http.url("http://www.hentai-foundry.com/?enterAgree=1&size=1500")
|
||||
.referrer("http://www.hentai-foundry.com/")
|
||||
resp = Http.url("https://www.hentai-foundry.com/?enterAgree=1&size=1500")
|
||||
.referrer("https://www.hentai-foundry.com/")
|
||||
.cookies(cookies)
|
||||
.response();
|
||||
// The only cookie that seems to matter in getting around the age wall is the phpsession cookie
|
||||
@@ -86,11 +87,11 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
||||
data.put("rating_incest" , "1");
|
||||
data.put("rating_rape" , "1");
|
||||
data.put("filter_media" , "A");
|
||||
data.put("filter_order" , "date_new");
|
||||
data.put("filter_order" , Utils.getConfigString("hentai-foundry.filter_order","date_old"));
|
||||
data.put("filter_type" , "0");
|
||||
|
||||
resp = Http.url("http://www.hentai-foundry.com/site/filters")
|
||||
.referrer("http://www.hentai-foundry.com/")
|
||||
resp = Http.url("https://www.hentai-foundry.com/site/filters")
|
||||
.referrer("https://www.hentai-foundry.com/")
|
||||
.cookies(cookies)
|
||||
.data(data)
|
||||
.method(Method.POST)
|
||||
@@ -102,7 +103,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
resp = Http.url(url)
|
||||
.referrer("http://www.hentai-foundry.com/")
|
||||
.referrer("https://www.hentai-foundry.com/")
|
||||
.cookies(cookies)
|
||||
.response();
|
||||
cookies.putAll(resp.cookies());
|
||||
@@ -119,7 +120,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
||||
Element first = els.first();
|
||||
try {
|
||||
String nextURL = first.attr("href");
|
||||
nextURL = "http://www.hentai-foundry.com" + nextURL;
|
||||
nextURL = "https://www.hentai-foundry.com" + nextURL;
|
||||
return Http.url(nextURL)
|
||||
.referrer(url)
|
||||
.cookies(cookies)
|
||||
@@ -135,8 +136,8 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
||||
// this if is for ripping pdf stories
|
||||
if (url.toExternalForm().contains("/stories/")) {
|
||||
for (Element pdflink : doc.select("a.pdfLink")) {
|
||||
LOGGER.info("grabbing " + "http://www.hentai-foundry.com" + pdflink.attr("href"));
|
||||
imageURLs.add("http://www.hentai-foundry.com" + pdflink.attr("href"));
|
||||
LOGGER.info("grabbing " + "https://www.hentai-foundry.com" + pdflink.attr("href"));
|
||||
imageURLs.add("https://www.hentai-foundry.com" + pdflink.attr("href"));
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
@@ -153,8 +154,8 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
||||
Document imagePage;
|
||||
try {
|
||||
|
||||
LOGGER.info("grabbing " + "http://www.hentai-foundry.com" + thumb.attr("href"));
|
||||
imagePage = Http.url("http://www.hentai-foundry.com" + thumb.attr("href")).cookies(cookies).get();
|
||||
LOGGER.info("grabbing " + "https://www.hentai-foundry.com" + thumb.attr("href"));
|
||||
imagePage = Http.url("https://www.hentai-foundry.com" + thumb.attr("href")).cookies(cookies).get();
|
||||
}
|
||||
|
||||
catch (IOException e) {
|
||||
@@ -164,10 +165,10 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
// This is here for when the image is resized to a thumbnail because ripme doesn't report a screensize
|
||||
if (imagePage.select("div.boxbody > img.center").attr("src").contains("thumbs.")) {
|
||||
imageURLs.add("http:" + imagePage.select("div.boxbody > img.center").attr("onclick").replace("this.src=", "").replace("'", "").replace("; $(#resize_message).hide();", ""));
|
||||
imageURLs.add("https:" + imagePage.select("div.boxbody > img.center").attr("onclick").replace("this.src=", "").replace("'", "").replace("; $(#resize_message).hide();", ""));
|
||||
}
|
||||
else {
|
||||
imageURLs.add("http:" + imagePage.select("div.boxbody > img.center").attr("src"));
|
||||
imageURLs.add("https:" + imagePage.select("div.boxbody > img.center").attr("src"));
|
||||
}
|
||||
}
|
||||
return imageURLs;
|
||||
@@ -179,7 +180,12 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
||||
if (url.toExternalForm().endsWith(".pdf")) {
|
||||
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
|
||||
} else {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
// If hentai-foundry.use_prefix is false the ripper will not add a numbered prefix to any images
|
||||
if (Utils.getConfigBoolean("hentai-foundry.use_prefix", true)) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
} else {
|
||||
addURLToDownload(url, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -0,0 +1,78 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class HentaifoxRipper extends AbstractHTMLRipper {
|
||||
|
||||
public HentaifoxRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "hentaifox";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "hentaifox.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("https://hentaifox.com/gallery/([\\d]+)/?");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected hentaifox URL format: " +
|
||||
"https://hentaifox.com/gallery/ID - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
LOGGER.info(doc);
|
||||
List<String> result = new ArrayList<>();
|
||||
for (Element el : doc.select("div.preview_thumb > a > img")) {
|
||||
String imageSource = "https:" + el.attr("data-src").replaceAll("t\\.jpg", ".jpg");
|
||||
result.add(imageSource);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
Document doc = getFirstPage();
|
||||
String title = doc.select("div.info > h1").first().text();
|
||||
return getHost() + "_" + title + "_" + getGID(url);
|
||||
} catch (Exception e) {
|
||||
// Fall back to default album naming convention
|
||||
LOGGER.warn("Failed to get album title from " + url, e);
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
@@ -145,15 +145,13 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
String title = getFirstPage().title();
|
||||
Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$");
|
||||
Matcher m = p.matcher(title);
|
||||
if (m.matches()) {
|
||||
return getHost() + "_" + m.group(1) + "_" + getGID(url);
|
||||
}
|
||||
title = title.replace("Porn Pics & Porn GIFs", "");
|
||||
title = title.replace(" ", "_");
|
||||
String toReturn = getHost() + "_" + title + "_" + getGID(url);
|
||||
return toReturn.replaceAll("__", "_");
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
private String getFullSizedImage(String pageURL) {
|
||||
|
@@ -39,9 +39,6 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
private String userID;
|
||||
private String rhx_gis = null;
|
||||
private String csrftoken;
|
||||
// Run into a weird issue with Jsoup cutting some json pages in half, this is a work around
|
||||
// see https://github.com/RipMeApp/ripme/issues/601
|
||||
private String workAroundJsonString;
|
||||
|
||||
|
||||
|
||||
@@ -192,6 +189,9 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
Document p = resp.parse();
|
||||
// Get the query hash so we can download the next page
|
||||
qHash = getQHash(p);
|
||||
if (qHash == null) {
|
||||
throw new IOException("Unable to extract qhash from page");
|
||||
}
|
||||
return getJSONFromPage(p);
|
||||
}
|
||||
|
||||
@@ -398,7 +398,6 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
private boolean pageHasImages(JSONObject json) {
|
||||
LOGGER.info(json);
|
||||
int numberOfImages = json.getJSONObject("data").getJSONObject("user")
|
||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
|
||||
if (numberOfImages == 0) {
|
||||
@@ -422,23 +421,36 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
|
||||
}
|
||||
in.close();
|
||||
workAroundJsonString = sb.toString();
|
||||
return new JSONObject(sb.toString());
|
||||
|
||||
} catch (MalformedURLException e) {
|
||||
LOGGER.info("Unable to get query_hash, " + url + " is a malformed URL");
|
||||
LOGGER.info("Unable to get page, " + url + " is a malformed URL");
|
||||
return null;
|
||||
} catch (IOException e) {
|
||||
LOGGER.info("Unable to get query_hash");
|
||||
LOGGER.info("Unable to get page");
|
||||
LOGGER.info(e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private String getQhashUrl(Document doc) {
|
||||
for(Element el : doc.select("link[rel=preload]")) {
|
||||
if (el.attr("href").contains("ProfilePageContainer")) {
|
||||
return el.attr("href");
|
||||
}
|
||||
}
|
||||
for(Element el : doc.select("link[rel=preload]")) {
|
||||
if (el.attr("href").contains("metro")) {
|
||||
return el.attr("href");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String getQHash(Document doc) {
|
||||
String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href");
|
||||
String jsFileURL = "https://www.instagram.com" + getQhashUrl(doc);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Document jsPage;
|
||||
LOGGER.info(jsFileURL);
|
||||
try {
|
||||
// We can't use Jsoup here because it won't download a non-html file larger than a MB
|
||||
// even if you set maxBodySize to 0
|
||||
@@ -454,7 +466,7 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
|
||||
return null;
|
||||
} catch (IOException e) {
|
||||
LOGGER.info("Unable to get query_hash");
|
||||
LOGGER.info("Unable to get query_hash from " + jsFileURL);
|
||||
LOGGER.info(e.getMessage());
|
||||
return null;
|
||||
}
|
||||
@@ -468,6 +480,12 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
m = jsP.matcher(sb.toString());
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
} else {
|
||||
jsP = Pattern.compile(",u=.([a-zA-Z0-9]+).");
|
||||
m = jsP.matcher(sb.toString());
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -477,6 +495,7 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
}
|
||||
LOGGER.error("Could not find query_hash on " + jsFileURL);
|
||||
return null;
|
||||
|
@@ -74,6 +74,6 @@ public class MulemaxRipper extends AbstractSingleFileRipper {
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
addURLToDownload(url, getPrefix(index), "", "mulemax.com", null);
|
||||
}
|
||||
}
|
@@ -51,7 +51,7 @@ public class PorncomixDotOneRipper extends AbstractHTMLRipper {
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
// We have 2 loops here to cover all the different album types
|
||||
for (Element el : doc.select(".dgwt-jg-gallery > a")) {
|
||||
for (Element el : doc.select(".dgwt-jg-item > a")) {
|
||||
result.add(el.attr("href"));
|
||||
}
|
||||
for (Element el : doc.select(".unite-gallery > img")) {
|
||||
|
@@ -118,6 +118,12 @@ public class RedditRipper extends AlbumRipper {
|
||||
return nextURL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a representation of the specified reddit page as a JSONArray using the reddit API
|
||||
* @param url The url of the desired page
|
||||
* @return A JSONArray object representation of the desired page
|
||||
* @throws IOException If no response is received from the url
|
||||
*/
|
||||
private JSONArray getJsonArrayFromURL(URL url) throws IOException {
|
||||
// Wait 2 seconds before the next request
|
||||
long timeDiff = System.currentTimeMillis() - lastRequestTime;
|
||||
@@ -149,9 +155,30 @@ public class RedditRipper extends AlbumRipper {
|
||||
return jsonArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Turns child JSONObject's into usable URLs and hands them off for further processing
|
||||
* Performs filtering checks based on the reddit.
|
||||
* Only called from getAndParseAndReturnNext() while parsing the JSONArray returned from reddit's API
|
||||
* @param child The child to process
|
||||
*/
|
||||
private void parseJsonChild(JSONObject child) {
|
||||
String kind = child.getString("kind");
|
||||
JSONObject data = child.getJSONObject("data");
|
||||
|
||||
//Upvote filtering
|
||||
if (Utils.getConfigBoolean("reddit.rip_by_upvote", false)){
|
||||
int score = data.getInt("score");
|
||||
int maxScore = Utils.getConfigInteger("reddit.max_upvotes", Integer.MAX_VALUE);
|
||||
int minScore = Utils.getConfigInteger("reddit.min_upvotes", Integer.MIN_VALUE);
|
||||
|
||||
if (score > maxScore || score < minScore) {
|
||||
|
||||
String message = "Skipping post with score outside specified range of " + minScore + " to " + maxScore;
|
||||
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, message);
|
||||
return; //Outside specified range, do not download
|
||||
}
|
||||
}
|
||||
|
||||
if (kind.equals("t1")) {
|
||||
// Comment
|
||||
handleBody(data.getString("body"), data.getString("id"), "");
|
||||
|
@@ -7,13 +7,31 @@ import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
public class ThechiveRipper extends AbstractHTMLRipper {
|
||||
private Pattern p1 = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
|
||||
private Pattern imagePattern = Pattern.compile("<img\\s(?:.|\\n)+?>");
|
||||
|
||||
// i.thechive.com specific variables.
|
||||
private Pattern p2 = Pattern.compile("^https?://i.thechive.com/([0-9a-zA-Z_]+)");
|
||||
private String jsonUrl = "https://i.thechive.com/rest/uploads";
|
||||
private Map<String, String> cookies = new HashMap<>();
|
||||
private String nextSeed = "";
|
||||
private String username = "";
|
||||
|
||||
public ThechiveRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@@ -21,7 +39,12 @@ public class ThechiveRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "thechive";
|
||||
Matcher m1 = p1.matcher(url.toExternalForm());
|
||||
if (m1.matches()) {
|
||||
return "thechive";
|
||||
} else {
|
||||
return "i.thechive"; // for suitable album title.
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -31,14 +54,20 @@ public class ThechiveRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
boolean isTag = false;
|
||||
return m.group(1);
|
||||
|
||||
Matcher m1 = p1.matcher(url.toExternalForm());
|
||||
if (m1.matches()) {
|
||||
return m1.group(1);
|
||||
}
|
||||
|
||||
Matcher m2 = p2.matcher(url.toExternalForm());
|
||||
if (m2.matches()) {
|
||||
username = m2.group(1);
|
||||
return username;
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected thechive.com URL format: "
|
||||
+ "thechive.com/YEAR/MONTH/DAY/POSTTITLE/ - got " + url + " instead");
|
||||
+ "thechive.com/YEAR/MONTH/DAY/POSTTITLE/ OR i.thechive.com/username, got " + url + " instead.");
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -49,27 +78,148 @@ public class ThechiveRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
for (Element el : doc.select("img.attachment-gallery-item-full")) {
|
||||
String imageSource;
|
||||
if (el.attr("data-gifsrc").isEmpty()) { //If it's not a gif
|
||||
imageSource = el.attr("src");
|
||||
} else { //If it is a gif
|
||||
imageSource = el.attr("data-gifsrc") //from data-gifsrc attribute
|
||||
.replaceAll("\\?w=\\d{3}", ""); //remove the width modifier at the end to get highest resolution
|
||||
//May need to replace the regex's {3} later on if website starts giving higher-res photos by default.
|
||||
}
|
||||
List<String> result;
|
||||
Matcher matcher = p1.matcher(url.toExternalForm());
|
||||
|
||||
// We replace thumbs with resizes so we can the full sized images
|
||||
imageSource = imageSource.replace("thumbs", "resizes");
|
||||
result.add(imageSource);
|
||||
if (matcher.matches()) {
|
||||
// for url type: thechive.com/YEAR/MONTH/DAY/POSTTITLE/
|
||||
result = getUrlsFromThechive(doc);
|
||||
} else {
|
||||
// for url type: i.thechive.com/username
|
||||
result = getUrlsFromIDotThechive();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
Matcher matcher = p1.matcher(url.toExternalForm());
|
||||
|
||||
if (matcher.matches()) {
|
||||
// url type thechive.com/YEAR/MONTH/DAY/POSTTITLE/ has a single page.
|
||||
return null;
|
||||
} else {
|
||||
if (nextSeed == null) {
|
||||
throw new IOException("No more pages.");
|
||||
}
|
||||
}
|
||||
|
||||
// Following try block checks if the next JSON object has images or not.
|
||||
// This is done to avoid IOException in rip() method, caused when
|
||||
// getURLsFromPage() returns empty list.
|
||||
JSONArray imgList;
|
||||
try {
|
||||
Response response = Http.url(jsonUrl).data("seed", nextSeed).data("queryType", "by-username")
|
||||
.data("username", username).ignoreContentType().cookies(cookies).response();
|
||||
cookies = response.cookies();
|
||||
JSONObject json = new JSONObject(response.body());
|
||||
imgList = json.getJSONArray("uploads");
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Error fetching next page.", e);
|
||||
}
|
||||
|
||||
if (imgList != null && imgList.length() > 0) {
|
||||
// Pass empty document as it is of no use for thechive.com/userName url type.
|
||||
return new Document(url.toString());
|
||||
} else {
|
||||
// Return null as this is last page.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
private List<String> getUrlsFromThechive(Document doc) {
|
||||
/*
|
||||
* The image urls are stored in a <script> tag of the document. This script
|
||||
* contains a single array var by name CHIVE_GALLERY_ITEMS.
|
||||
*
|
||||
* We grab all the <img> tags from the particular script, combine them in a
|
||||
* string, parse it, and grab all the img/gif urls.
|
||||
*
|
||||
*/
|
||||
List<String> result = new ArrayList<>();
|
||||
Elements scripts = doc.getElementsByTag("script");
|
||||
|
||||
for (Element script : scripts) {
|
||||
String data = script.data();
|
||||
|
||||
if (!data.contains("CHIVE_GALLERY_ITEMS")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* We add all the <img/> tags in a single StringBuilder and parse as HTML for
|
||||
* easy sorting of img/ gifs.
|
||||
*/
|
||||
StringBuilder allImgTags = new StringBuilder();
|
||||
Matcher matcher = imagePattern.matcher(data);
|
||||
while (matcher.find()) {
|
||||
// Unescape '\' from the img tags, which also unescape's img url as well.
|
||||
allImgTags.append(matcher.group(0).replaceAll("\\\\", ""));
|
||||
}
|
||||
|
||||
// Now we parse and sort links.
|
||||
Document imgDoc = Jsoup.parse(allImgTags.toString());
|
||||
Elements imgs = imgDoc.getElementsByTag("img");
|
||||
for (Element img : imgs) {
|
||||
if (img.hasAttr("data-gifsrc")) {
|
||||
// For gifs.
|
||||
result.add(img.attr("data-gifsrc"));
|
||||
} else {
|
||||
// For jpeg images.
|
||||
result.add(img.attr("src"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// strip all GET parameters from the links( such as quality, width, height as to
|
||||
// get the original image.).
|
||||
result.replaceAll(s -> s.substring(0, s.indexOf("?")));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<String> getUrlsFromIDotThechive() {
|
||||
/*
|
||||
* Image urls for i.thechive.com/someUserName as fetched via JSON request. Each
|
||||
*
|
||||
* JSON request uses the cookies from previous response( which contains the next
|
||||
* CSRF token).
|
||||
*
|
||||
* JSON request parameters:
|
||||
* 1. seed: activityId of the last url.
|
||||
* 2. queryType: 'by-username' always.
|
||||
* 3. username: username from the url itself.
|
||||
*/
|
||||
List<String> result = new ArrayList<>();
|
||||
try {
|
||||
Response response = Http.url(jsonUrl).data("seed", nextSeed).data("queryType", "by-username")
|
||||
.data("username", username).ignoreContentType().cookies(cookies).response();
|
||||
cookies = response.cookies();
|
||||
JSONObject json = new JSONObject(response.body());
|
||||
JSONArray imgList = json.getJSONArray("uploads");
|
||||
nextSeed = null; // if no more images, nextSeed stays null
|
||||
|
||||
for (int i = 0; i < imgList.length(); i++) {
|
||||
JSONObject img = imgList.getJSONObject(i);
|
||||
if (img.getString("mediaType").equals("gif")) {
|
||||
result.add("https:" + img.getString("mediaUrlOverlay"));
|
||||
} else {
|
||||
result.add("https:" + img.getString("mediaGifFrameUrl"));
|
||||
}
|
||||
nextSeed = img.getString("activityId");
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Unable to fetch JSON data for url: " + url);
|
||||
} catch (JSONException e) {
|
||||
LOGGER.error("JSON error while parsing data for url: " + url);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -26,6 +26,7 @@ public class TwitterRipper extends AlbumRipper {
|
||||
HOST = "twitter";
|
||||
|
||||
private static final int MAX_REQUESTS = Utils.getConfigInteger("twitter.max_requests", 10);
|
||||
private static final boolean RIP_RETWEETS = Utils.getConfigBoolean("twitter.rip_retweets", true);
|
||||
private static final int WAIT_TIME = 2000;
|
||||
|
||||
// Base 64 of consumer key : consumer secret
|
||||
@@ -177,6 +178,11 @@ public class TwitterRipper extends AlbumRipper {
|
||||
LOGGER.error("XXX Tweet doesn't have entitites");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!RIP_RETWEETS && tweet.has("retweeted_status")) {
|
||||
LOGGER.info("Skipping a retweet as twitter.rip_retweet is set to false.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
JSONObject entities = tweet.getJSONObject("extended_entities");
|
||||
|
||||
|
@@ -52,9 +52,9 @@ public class XcartxRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
Elements albumElements = page.select("a.highslide");
|
||||
for (Element imageBox : albumElements) {
|
||||
String imageUrl = imageBox.attr("href");
|
||||
Elements imageElements = page.select("div.f-desc img");
|
||||
for (Element image : imageElements) {
|
||||
String imageUrl = image.attr("abs:src");
|
||||
|
||||
imageURLs.add(imageUrl);
|
||||
}
|
||||
|
@@ -45,6 +45,7 @@ public class XhamsterRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
String URLToReturn = url.toExternalForm();
|
||||
URLToReturn = URLToReturn.replaceAll("xhamster.one", "xhamster.com");
|
||||
URLToReturn = URLToReturn.replaceAll("m.xhamster.com", "xhamster.com");
|
||||
URLToReturn = URLToReturn.replaceAll("\\w\\w.xhamster.com", "xhamster.com");
|
||||
URL san_url = new URL(URLToReturn.replaceAll("xhamster.com", "m.xhamster.com"));
|
||||
@@ -113,17 +114,17 @@ public class XhamsterRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
Pattern p = Pattern.compile("^https?://([\\w\\w]*\\.)?xhamster\\.com/photos/gallery/.*?(\\d+)$");
|
||||
Pattern p = Pattern.compile("^https?://([\\w\\w]*\\.)?xhamster\\.(com|one)/photos/gallery/.*?(\\d+)$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return true;
|
||||
}
|
||||
p = Pattern.compile("^https?://[\\w\\w.]*xhamster\\.com/users/([a-zA-Z0-9_-]+)/photos");
|
||||
p = Pattern.compile("^https?://[\\w\\w.]*xhamster\\.(com|one)/users/([a-zA-Z0-9_-]+)/photos");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return true;
|
||||
}
|
||||
p = Pattern.compile("^https?://.*xhamster\\.com/(movies|videos)/.*$");
|
||||
p = Pattern.compile("^https?://.*xhamster\\.(com|one)/(movies|videos)/.*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return true;
|
||||
|
@@ -0,0 +1,36 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class XlecxRipper extends XcartxRipper {
|
||||
|
||||
private Pattern p = Pattern.compile("^https?://xlecx.com/([a-zA-Z0-9_\\-]+).html");
|
||||
|
||||
public XlecxRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "xlecx";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "xlecx.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected URL format: http://xlecx.com/comic, got: " + url);
|
||||
|
||||
}
|
||||
}
|
@@ -57,4 +57,11 @@ public class ChanSite {
|
||||
domains = Domains;
|
||||
cdnDomains = Domains;
|
||||
}
|
||||
public List<String> getDomains() {
|
||||
return domains;
|
||||
}
|
||||
|
||||
public List<String> getCdns() {
|
||||
return cdnDomains;
|
||||
}
|
||||
}
|
||||
|
@@ -2,6 +2,7 @@ package com.rarchives.ripme.ui;
|
||||
|
||||
import java.awt.Dimension;
|
||||
import java.io.*;
|
||||
import java.net.URISyntaxException;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
||||
@@ -22,10 +23,21 @@ import com.rarchives.ripme.utils.Utils;
|
||||
public class UpdateUtils {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
|
||||
private static final String DEFAULT_VERSION = "1.7.77";
|
||||
private static final String DEFAULT_VERSION = "1.7.83";
|
||||
private static final String REPO_NAME = "ripmeapp/ripme";
|
||||
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
|
||||
private static final String mainFileName = "ripme.jar";
|
||||
private static String mainFileName;
|
||||
|
||||
static {
|
||||
try {
|
||||
mainFileName = new File(UpdateUtils.class.getProtectionDomain().getCodeSource().getLocation().toURI()).getAbsolutePath();
|
||||
} catch (URISyntaxException e) {
|
||||
mainFileName = "ripme.jar";
|
||||
logger.error("Unable to get path of jar");
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static final String updateFileName = "ripme.jar.update";
|
||||
private static JSONObject ripmeJson;
|
||||
|
||||
@@ -259,38 +271,20 @@ public class UpdateUtils {
|
||||
logger.info("Hash is good");
|
||||
}
|
||||
}
|
||||
if (shouldLaunch) {
|
||||
// Setup updater script
|
||||
final String batchFile, script;
|
||||
final String[] batchExec;
|
||||
String os = System.getProperty("os.name").toLowerCase();
|
||||
if (os.contains("win")) {
|
||||
// Windows
|
||||
batchFile = "update_ripme.bat";
|
||||
String batchPath = new File(batchFile).getAbsolutePath();
|
||||
script = "@echo off\r\n"
|
||||
+ "timeout 1" + "\r\n"
|
||||
+ "copy " + updateFileName + " " + mainFileName + "\r\n"
|
||||
+ "del " + updateFileName + "\r\n"
|
||||
+ "ripme.jar" + "\r\n"
|
||||
+ "del " + batchPath + "\r\n";
|
||||
batchExec = new String[]{batchPath};
|
||||
|
||||
} else {
|
||||
// Mac / Linux
|
||||
batchFile = "update_ripme.sh";
|
||||
String batchPath = new File(batchFile).getAbsolutePath();
|
||||
script = "#!/bin/sh\n"
|
||||
+ "sleep 1" + "\n"
|
||||
+ "cd " + new File(mainFileName).getAbsoluteFile().getParent() + "\n"
|
||||
+ "cp -f " + updateFileName + " " + mainFileName + "\n"
|
||||
+ "rm -f " + updateFileName + "\n"
|
||||
+ "java -jar \"" + new File(mainFileName).getAbsolutePath() + "\" &\n"
|
||||
+ "sleep 1" + "\n"
|
||||
+ "rm -f " + batchPath + "\n";
|
||||
batchExec = new String[]{"sh", batchPath};
|
||||
if (System.getProperty("os.name").toLowerCase().contains("win")) {
|
||||
// Windows
|
||||
final String batchFile = "update_ripme.bat";
|
||||
final String batchPath = new File(batchFile).getAbsolutePath();
|
||||
String script = "@echo off\r\n"
|
||||
+ "timeout 1\r\n"
|
||||
+ "copy " + updateFileName + " " + mainFileName + "\r\n"
|
||||
+ "del " + updateFileName + "\r\n";
|
||||
if (shouldLaunch) {
|
||||
script += mainFileName + "\r\n";
|
||||
}
|
||||
|
||||
script += "del " + batchPath + "\r\n";
|
||||
final String[] batchExec = new String[]{batchPath};
|
||||
// Create updater script
|
||||
try (BufferedWriter bw = new BufferedWriter(new FileWriter(batchFile))) {
|
||||
bw.write(script);
|
||||
@@ -311,9 +305,19 @@ public class UpdateUtils {
|
||||
logger.info("Exiting older version, should execute update script (" + batchFile + ") during exit");
|
||||
System.exit(0);
|
||||
} else {
|
||||
new File(mainFileName).delete();
|
||||
new File(updateFileName).renameTo(new File(mainFileName));
|
||||
// Mac / Linux
|
||||
// Modifying file and launching it: *nix distributions don't have any issues with modifying/deleting files
|
||||
// while they are being run
|
||||
File mainFile = new File(mainFileName);
|
||||
String mainFilePath = mainFile.getAbsolutePath();
|
||||
mainFile.delete();
|
||||
new File(updateFileName).renameTo(new File(mainFilePath));
|
||||
if (shouldLaunch) {
|
||||
// No need to do it during shutdown: the file used will indeed be the new one
|
||||
Runtime.getRuntime().exec("java -jar " + mainFileName);
|
||||
}
|
||||
logger.info("Update installed, newer version should be executed upon relaunch");
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -26,7 +26,22 @@ tumblr.auth = JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX
|
||||
gw.api = gonewild
|
||||
|
||||
twitter.max_requests = 10
|
||||
twitter.rip_retweets = false
|
||||
|
||||
clipboard.autorip = false
|
||||
|
||||
download.save_order = true
|
||||
|
||||
## Reddit ripper configs
|
||||
# Determines whether or not to filter reddit ripping by upvote
|
||||
# Enables the reddit.min_upvotes and reddit.max_upvotes properties when true
|
||||
reddit.rip_by_upvote = false
|
||||
|
||||
# Only rips file if the number of upvotes is equal to or greater than this value
|
||||
# Requires reddit.rip_by_upvote = true
|
||||
reddit.min_upvotes = 0
|
||||
|
||||
# Only rips files if the number of upvotes is less than this value
|
||||
# Requires reddit.rip_by_upvote = true
|
||||
reddit.max_upvotes = 10000
|
||||
|
||||
|
@@ -0,0 +1,19 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.ArtstnRipper;
|
||||
|
||||
public class ArtstnRipperTest extends RippersTest {
|
||||
|
||||
public void testSingleProject() throws IOException {
|
||||
URL url = new URL("https://artstn.co/p/JlE15Z");
|
||||
testRipper(new ArtstnRipper(url));
|
||||
}
|
||||
|
||||
public void testUserPortfolio() throws IOException {
|
||||
URL url = new URL("https://artstn.co/m/rv37");
|
||||
testRipper(new ArtstnRipper(url));
|
||||
}
|
||||
}
|
@@ -6,8 +6,9 @@ import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
public class BlackbrickroadofozRipperTest extends RippersTest {
|
||||
public void testRip() throws IOException {
|
||||
BlackbrickroadofozRipper ripper = new BlackbrickroadofozRipper(new URL("http://www.blackbrickroadofoz.com/comic/beginning"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
// Commented out on 02/04/2019 because the serve has been down for a while
|
||||
// public void testRip() throws IOException {
|
||||
// BlackbrickroadofozRipper ripper = new BlackbrickroadofozRipper(new URL("http://www.blackbrickroadofoz.com/comic/beginning"));
|
||||
// testRipper(ripper);
|
||||
// }
|
||||
}
|
||||
|
@@ -4,9 +4,11 @@ import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.ChanRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.ripperhelpers.ChanSite;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
@@ -42,6 +44,21 @@ public class ChanRipperTest extends RippersTest {
|
||||
}
|
||||
}
|
||||
|
||||
public void testChanStringParsing() throws IOException {
|
||||
List<String> site1 = Arrays.asList("site1.com");
|
||||
List<String> site1Cdns = Arrays.asList("cnd1.site1.com", "cdn2.site2.biz");
|
||||
|
||||
List<String> site2 = Arrays.asList("site2.co.uk");
|
||||
List<String> site2Cdns = Arrays.asList("cdn.site2.co.uk");
|
||||
ChanRipper ripper = new ChanRipper(new URL("http://desuchan.net/v/res/7034.html"));
|
||||
List<ChanSite> chansFromConfig = ripper.getChansFromConfig("site1.com[cnd1.site1.com|cdn2.site2.biz],site2.co.uk[cdn.site2.co.uk]");
|
||||
assertEquals(chansFromConfig.get(0).getDomains(), site1);
|
||||
assertEquals(chansFromConfig.get(0).getCdns(), site1Cdns);
|
||||
|
||||
assertEquals(chansFromConfig.get(1).getDomains(), site2);
|
||||
assertEquals(chansFromConfig.get(1).getCdns(), site2Cdns);
|
||||
}
|
||||
|
||||
public void testChanRipper() throws IOException {
|
||||
List<URL> contentURLs = new ArrayList<>();
|
||||
contentURLs.add(new URL(getRandomThreadDesuarchive()));
|
||||
|
@@ -0,0 +1,21 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import com.rarchives.ripme.ripper.rippers.ComicextraRipper;
|
||||
|
||||
public class ComicextraRipperTest extends RippersTest {
|
||||
|
||||
public void testComicUrl() throws IOException {
|
||||
URL url = new URL("https://www.comicextra.com/comic/karma-police");
|
||||
ComicextraRipper ripper = new ComicextraRipper(url);
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testChapterUrl() throws IOException {
|
||||
URL url = new URL("https://www.comicextra.com/v-for-vendetta/chapter-1");
|
||||
ComicextraRipper ripper = new ComicextraRipper(url);
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
}
|
@@ -31,8 +31,9 @@ public class DeviantartRipperTest extends RippersTest {
|
||||
URL url = new URL("https://www.deviantart.com/airgee/gallery/");
|
||||
DeviantartRipper ripper = new DeviantartRipper(url);
|
||||
Document doc = Http.url(url).get();
|
||||
assertEquals("airgee", ripper.getUsername(doc));
|
||||
assertEquals("714589", ripper.getGalleryID(doc));
|
||||
//Had to comment because of refactoring/style change
|
||||
//assertEquals("airgee", ripper.getUsername(doc));
|
||||
//assertEquals("714589", ripper.getGalleryID(doc));
|
||||
}
|
||||
|
||||
public void testSanitizeURL() throws IOException {
|
||||
|
@@ -10,4 +10,26 @@ public class E621RipperTest extends RippersTest {
|
||||
E621Ripper ripper = new E621Ripper(new URL("https://e621.net/post/index/1/beach"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testFlashOrWebm() throws IOException {
|
||||
E621Ripper ripper = new E621Ripper(new URL("https://e621.net/post/index/1/gif"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testGetNextPage() throws IOException {
|
||||
E621Ripper nextPageRipper = new E621Ripper(new URL("https://e621.net/post/index/1/cosmicminerals"));
|
||||
try {
|
||||
nextPageRipper.getNextPage(nextPageRipper.getFirstPage());
|
||||
assert (true);
|
||||
} catch (IOException e) {
|
||||
throw e;
|
||||
}
|
||||
|
||||
E621Ripper noNextPageRipper = new E621Ripper(new URL("https://e621.net/post/index/1/cosmicminerals"));
|
||||
try {
|
||||
noNextPageRipper.getNextPage(noNextPageRipper.getFirstPage());
|
||||
} catch (IOException e) {
|
||||
assertEquals(e.getMessage(), "No more pages.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,18 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.ErofusRipper;
|
||||
|
||||
public class ErofusRipperTest extends RippersTest {
|
||||
public void testRip() throws IOException {
|
||||
ErofusRipper ripper = new ErofusRipper(new URL("https://www.erofus.com/comics/be-story-club-comics/a-kiss/issue-1"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testGetGID() throws IOException {
|
||||
ErofusRipper ripper = new ErofusRipper(new URL("https://www.erofus.com/comics/be-story-club-comics/a-kiss/issue-1"));
|
||||
assertEquals("be-story-club-comics", ripper.getGID(new URL("https://www.erofus.com/comics/be-story-club-comics/a-kiss/issue-1")));
|
||||
}
|
||||
}
|
@@ -6,11 +6,17 @@ import java.net.URL;
|
||||
import com.rarchives.ripme.ripper.rippers.FuraffinityRipper;
|
||||
|
||||
public class FuraffinityRipperTest extends RippersTest {
|
||||
|
||||
public void testFuraffinityAlbum() throws IOException {
|
||||
FuraffinityRipper ripper = new FuraffinityRipper(new URL("https://www.furaffinity.net/gallery/spencerdragon/"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testFuraffinityScrap() throws IOException {
|
||||
FuraffinityRipper ripper = new FuraffinityRipper(new URL("http://www.furaffinity.net/scraps/sssonic2/"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testGetGID() throws IOException {
|
||||
URL url = new URL("https://www.furaffinity.net/gallery/mustardgas/");
|
||||
FuraffinityRipper ripper = new FuraffinityRipper(url);
|
||||
|
@@ -1,15 +1,20 @@
|
||||
//package com.rarchives.ripme.tst.ripper.rippers;
|
||||
//
|
||||
//import java.io.IOException;
|
||||
//import java.net.URL;
|
||||
//
|
||||
//import com.rarchives.ripme.ripper.rippers.FuskatorRipper;
|
||||
//
|
||||
//public class FuskatorRipperTest extends RippersTest {
|
||||
// public void testFuskatorAlbum() throws IOException {
|
||||
// FuskatorRipper ripper = new FuskatorRipper(new URL("https://fuskator.com/thumbs/hqt6pPXAf9z/Shaved-Blonde-Babe-Katerina-Ambre.html"));
|
||||
// testRipper(ripper);
|
||||
// }
|
||||
//}
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.FuskatorRipper;
|
||||
|
||||
public class FuskatorRipperTest extends RippersTest {
|
||||
public void testFuskatorAlbum() throws IOException {
|
||||
FuskatorRipper ripper = new FuskatorRipper(new URL("https://fuskator.com/thumbs/hqt6pPXAf9z/Shaved-Blonde-Babe-Katerina-Ambre.html"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testUrlsWithTiled() throws IOException {
|
||||
FuskatorRipper ripper = new FuskatorRipper(new URL("https://fuskator.com/thumbs/hsrzk~UIFmJ/Blonde-Babe-Destiny-Dixon-Playing-With-Black-Dildo.html"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
||||
|
||||
// Disabled because of https://github.com/RipMeApp/ripme/issues/393
|
@@ -23,4 +23,9 @@ public class GfycatRipperTest extends RippersTest {
|
||||
GfycatRipper ripper = new GfycatRipper(new URL("https://gfycat.com/gifs/detail/limitedtestyamericancrow"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testGfycatProfile() throws IOException {
|
||||
GfycatRipper ripper = new GfycatRipper(new URL("https://gfycat.com/@golbanstorage"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
@@ -0,0 +1,13 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.HentaifoxRipper;
|
||||
|
||||
public class HentaifoxRipperTest extends RippersTest {
|
||||
public void testRip() throws IOException {
|
||||
HentaifoxRipper ripper = new HentaifoxRipper(new URL("https://hentaifox.com/gallery/38544/"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
@@ -24,4 +24,9 @@ public class ImagefapRipperTest extends RippersTest {
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
||||
public void testImagefapGetAlbumTitle() throws IOException {
|
||||
URL url = new URL("https://www.imagefap.com/gallery.php?gid=7789753");
|
||||
ImagefapRipper ripper = new ImagefapRipper(url);
|
||||
assertEquals("imagefap_Red.Heels.Lover.In.Love_7789753", ripper.getAlbumTitle(url));
|
||||
}
|
||||
}
|
||||
|
@@ -6,10 +6,11 @@ import java.net.URL;
|
||||
import com.rarchives.ripme.ripper.rippers.ImagevenueRipper;
|
||||
|
||||
public class ImagevenueRipperTest extends RippersTest {
|
||||
public void testImagevenueRip() throws IOException {
|
||||
ImagevenueRipper ripper = new ImagevenueRipper(new URL("http://img120.imagevenue.com/galshow.php?gal=gallery_1373818527696_191lo"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
// See https://github.com/RipMeApp/ripme/issues/1202
|
||||
// public void testImagevenueRip() throws IOException {
|
||||
// ImagevenueRipper ripper = new ImagevenueRipper(new URL("http://img120.imagevenue.com/galshow.php?gal=gallery_1373818527696_191lo"));
|
||||
// testRipper(ripper);
|
||||
// }
|
||||
|
||||
public void testGetGID() throws IOException {
|
||||
URL url = new URL("http://img120.imagevenue.com/galshow.php?gal=gallery_1373818527696_191lo");
|
||||
|
@@ -9,13 +9,7 @@ public class SinnercomicsRipperTest extends RippersTest {
|
||||
public void testSinnercomicsAlbum() throws IOException {
|
||||
SinnercomicsRipper ripper;
|
||||
|
||||
// Comic test
|
||||
ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/"));
|
||||
testRipper(ripper);
|
||||
|
||||
|
||||
// Pinup test
|
||||
ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/elsa-frozen-2/#comments"));
|
||||
ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/comic/gw-addendum-page-01/"));
|
||||
testRipper(ripper);
|
||||
|
||||
}
|
||||
|
@@ -26,9 +26,9 @@ package com.rarchives.ripme.tst.ripper.rippers;
|
||||
import com.rarchives.ripme.ripper.rippers.ThechiveRipper;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import org.jsoup.nodes.Attributes;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.parser.Tag;
|
||||
//import org.jsoup.nodes.Attributes;
|
||||
//import org.jsoup.nodes.Element;
|
||||
//import org.jsoup.parser.Tag;
|
||||
|
||||
/**
|
||||
*
|
||||
@@ -41,40 +41,54 @@ public class ThechiveRipperTest extends RippersTest {
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void theChiveRip() throws IOException {
|
||||
ThechiveRipper ripper = new ThechiveRipper(new URL("https://thechive.com/2018/10/03/the-definitive-list-of-the-hottest-horror-movie-babes/"));
|
||||
public void testTheChiveRip() throws IOException {
|
||||
ThechiveRipper ripper = new ThechiveRipper(new URL(
|
||||
"https://thechive.com/2019/03/16/beautiful-badasses-lookin-good-in-and-out-of-uniform-35-photos/"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testTheChiveGif() throws IOException {
|
||||
ThechiveRipper ripper = new ThechiveRipper(
|
||||
new URL("https://thechive.com/2019/03/14/dont-tease-me-just-squeeze-me-20-gifs/"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
//If anyone figures out how to get JSOUP Elements mocked up, we can use the following methods to test both jpeg + gif ripping.
|
||||
|
||||
public void testGifRip() throws IOException {
|
||||
String elementInString = "<img width=\"500\" height=\"305\" \n"
|
||||
+ "src=\"https://thechive.files.wordpress.com/2018/10/american_mary_crimson_quill-111.jpg?quality=85&strip=info\" \n"
|
||||
+ "class=\"attachment-gallery-item-full size-gallery-item-full gif-animate\" \n"
|
||||
+ "alt=\"american mary crimson quill 111 The hottest horror movie villains ever according to science (18 Photos)\" \n"
|
||||
+ "title=\"\" data-gifsrc=\"https://thechive.files.wordpress.com/2018/10/american_mary_crimson_quill-1.gif?w=500\">"
|
||||
|
||||
Element el = new Element(
|
||||
new Tag("img"),
|
||||
"",//URI
|
||||
new Attributes());
|
||||
String URL = ThechiveRipper.getImageSource(el);
|
||||
assertTrue(URL.equals("https://thechive.files.wordpress.com/2018/10/american_mary_crimson_quill-1.gif"));
|
||||
* "i.thechive.com" test.
|
||||
*/
|
||||
|
||||
public void testIDotThechive() throws IOException {
|
||||
ThechiveRipper ripper = new ThechiveRipper(new URL("https://i.thechive.com/HHHoney"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testGifRip() throws IOException {
|
||||
String elementInString = "<img width=\"600\" height=\"409\" src=\"https://thechive.files.wordpress.com/2018/10/the-definitive-list-of-the-hottest-horror-movie-babes-11.jpg?quality=85&strip=info&w=600\" \n"
|
||||
+ "class=\"attachment-gallery-item-full size-gallery-item-full\" \n"
|
||||
+ "alt=\"the definitive list of the hottest horror movie babes 11 The hottest horror movie villains ever according to science (18 Photos)\" title=\"\">";
|
||||
Element el = new Element(
|
||||
new Tag("img"),
|
||||
"",//URI
|
||||
new Attributes());
|
||||
String URL = ThechiveRipper.getImageSource(el);
|
||||
assertTrue(URL.equals("https://thechive.files.wordpress.com/2018/10/the-definitive-list-of-the-hottest-horror-movie-babes-11.jpg"));
|
||||
}
|
||||
/*
|
||||
*
|
||||
* //If anyone figures out how to get JSOUP Elements mocked up, we can use the
|
||||
* following methods to test both jpeg + gif ripping.
|
||||
*
|
||||
* public void testGifRip() throws IOException { String elementInString =
|
||||
* "<img width=\"500\" height=\"305\" \n" +
|
||||
* "src=\"https://thechive.files.wordpress.com/2018/10/american_mary_crimson_quill-111.jpg?quality=85&strip=info\" \n"
|
||||
* +
|
||||
* "class=\"attachment-gallery-item-full size-gallery-item-full gif-animate\" \n"
|
||||
* +
|
||||
* "alt=\"american mary crimson quill 111 The hottest horror movie villains ever according to science (18 Photos)\" \n"
|
||||
* +
|
||||
* "title=\"\" data-gifsrc=\"https://thechive.files.wordpress.com/2018/10/american_mary_crimson_quill-1.gif?w=500\">"
|
||||
*
|
||||
* Element el = new Element( new Tag("img"), "",//URI new Attributes()); String
|
||||
* URL = ThechiveRipper.getImageSource(el); assertTrue(URL.equals(
|
||||
* "https://thechive.files.wordpress.com/2018/10/american_mary_crimson_quill-1.gif"
|
||||
* )); }
|
||||
*
|
||||
* public void testGifRip() throws IOException { String elementInString =
|
||||
* "<img width=\"600\" height=\"409\" src=\"https://thechive.files.wordpress.com/2018/10/the-definitive-list-of-the-hottest-horror-movie-babes-11.jpg?quality=85&strip=info&w=600\" \n"
|
||||
* + "class=\"attachment-gallery-item-full size-gallery-item-full\" \n" +
|
||||
* "alt=\"the definitive list of the hottest horror movie babes 11 The hottest horror movie villains ever according to science (18 Photos)\" title=\"\">"
|
||||
* ; Element el = new Element( new Tag("img"), "",//URI new Attributes());
|
||||
* String URL = ThechiveRipper.getImageSource(el); assertTrue(URL.equals(
|
||||
* "https://thechive.files.wordpress.com/2018/10/the-definitive-list-of-the-hottest-horror-movie-babes-11.jpg"
|
||||
* )); }
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,13 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.XlecxRipper;
|
||||
|
||||
public class XlecxRipperTest extends RippersTest {
|
||||
public void testAlbum() throws IOException {
|
||||
XlecxRipper ripper = new XlecxRipper(new URL("http://xlecx.com/4274-black-canary-ravished-prey.html"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user