1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-27 15:49:50 +02:00

Merge branch 'master' of github.com:RipMeApp/ripme

This commit is contained in:
Dominik Sitar
2018-11-27 12:25:46 +01:00
7 changed files with 314 additions and 174 deletions

View File

@@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId> <groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId> <artifactId>ripme</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<version>1.7.69</version> <version>1.7.70</version>
<name>ripme</name> <name>ripme</name>
<url>http://rip.rarchives.com</url> <url>http://rip.rarchives.com</url>
<properties> <properties>

View File

@@ -1,6 +1,6 @@
{ {
"latestVersion": "1.7.69",
"changeList": [ "changeList": [
"1.7.70: Added arabic translation; Updater now works on java 10; Fixed mangadex ripper",
"1.7.69: Fixes TheChive bug so that it can now rip gifs; e621 ripper now rips all media types; Upgraded org.apache.httpcomponents to 4.3.6; Added ripper for Mangadex.org; Added ripper for various duckmovie frontends; reddit ripper no longer freezes when ripping certain links", "1.7.69: Fixes TheChive bug so that it can now rip gifs; e621 ripper now rips all media types; Upgraded org.apache.httpcomponents to 4.3.6; Added ripper for Mangadex.org; Added ripper for various duckmovie frontends; reddit ripper no longer freezes when ripping certain links",
"1.7.68: Added support for 55chan.org; Now limits file name length to 255 chars; fixed Tsumino ripper", "1.7.68: Added support for 55chan.org; Now limits file name length to 255 chars; fixed Tsumino ripper",
"1.7.67: Added yuki.la ripper; Fixed xhamster ripper; Fixed instagram ripper; Added porncomix.one ripper; Fixed bug which caused large files to be download when running tests", "1.7.67: Added yuki.la ripper; Fixed xhamster ripper; Fixed instagram ripper; Added porncomix.one ripper; Fixed bug which caused large files to be download when running tests",
@@ -241,5 +241,6 @@
"1.0.3: Added VK.com ripper", "1.0.3: Added VK.com ripper",
"1.0.1: Added auto-update functionality" "1.0.1: Added auto-update functionality"
], ],
"currentHash": "5c312c50aed4a33112d3c77cf9cae68be1793b167ba2a741c33453e556a66c73" "latestVersion": "1.7.70",
"currentHash": "d838bc3a6ed86bb422dd53dbd58f11e28001cd844dc1f2fdee98fe004d1bc237"
} }

View File

@@ -18,6 +18,7 @@ import org.jsoup.nodes.Element;
public class ChanRipper extends AbstractHTMLRipper { public class ChanRipper extends AbstractHTMLRipper {
private static List<ChanSite> explicit_domains = Arrays.asList( private static List<ChanSite> explicit_domains = Arrays.asList(
new ChanSite("boards.4chan.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")), new ChanSite("boards.4chan.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")),
new ChanSite("boards.4channel.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")),
new ChanSite("4archive.org", "imgur.com"), new ChanSite("4archive.org", "imgur.com"),
new ChanSite("archive.4plebs.org", "img.4plebs.org"), new ChanSite("archive.4plebs.org", "img.4plebs.org"),
new ChanSite("yuki.la", "ii.yuki.la"), new ChanSite("yuki.la", "ii.yuki.la"),

View File

@@ -0,0 +1,149 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import org.json.JSONObject;
import org.json.JSONArray;
public class DerpiRipper extends AbstractJSONRipper {
private URL currUrl;
private Integer currPage;
public DerpiRipper(URL url) throws IOException {
super(url);
}
private String apiUrl;
@Override
public String getHost() {
return "DerpiBooru";
}
@Override
public String getDomain() {
return "derpibooru.org";
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
String u = url.toExternalForm();
String[] uu = u.split("\\?", 2);
String newU = uu[0];
if (newU.substring(newU.length() - 1).equals("/")) {
newU = newU.substring(0, newU.length() - 1);
}
newU += ".json?";
if (uu.length > 1) {
newU += uu[1];
}
String key = Utils.getConfigString("derpi.key", "");
if (!key.equals("")) {
newU += "&key=" + key;
}
return new URL(newU);
}
@Override
public String getGID(URL url) throws MalformedURLException {
currUrl = url;
currPage = 1;
// search
Pattern p = Pattern.compile("^https?://derpibooru\\.org/search\\.json\\?q=([^&]+).*?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "search_" + m.group(1);
}
// tags
p = Pattern.compile("^https?://derpibooru\\.org/tags/([^.]+)\\.json.*?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "tags_" + m.group(1);
}
// galleries
p = Pattern.compile("^https?://derpibooru\\.org/galleries/([^/]+)/(\\d+)\\.json.*?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "galleries_" + m.group(1) + "_" + m.group(2);
}
// single image
p = Pattern.compile("^https?://derpibooru\\.org/(\\d+)\\.json.*?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "image_" + m.group(1);
}
throw new MalformedURLException("Unable to find image in " + url);
}
@Override
public JSONObject getFirstPage() throws IOException {
return Http.url(url).getJSON();
}
@Override
public JSONObject getNextPage(JSONObject doc) throws IOException {
currPage++;
String u = currUrl.toExternalForm() + "&page=" + Integer.toString(currPage);
JSONObject json = Http.url(new URL(u)).getJSON();
JSONArray arr;
if (json.has("images")) {
arr = json.getJSONArray("images");
} else if (json.has("search")) {
arr = json.getJSONArray("search");
} else {
throw new IOException("No more images");
}
if (arr.length() == 0) {
throw new IOException("No more images");
}
return json;
}
private String getImageUrlFromJson(JSONObject json) {
return "https:" + json.getJSONObject("representations").getString("full");
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<>();
JSONArray arr = null;
if (json.has("images")) {
arr = json.getJSONArray("images");
} else if (json.has("search")) {
arr = json.getJSONArray("search");
}
if (arr != null) {
for (int i = 0; i < arr.length(); i++){
imageURLs.add(this.getImageUrlFromJson(arr.getJSONObject(i)));
}
} else {
imageURLs.add(this.getImageUrlFromJson(json));
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
// we don't set an index prefix here as derpibooru already prefixes their images with their unique IDs
addURLToDownload(url, "");
}
}

View File

@@ -3,32 +3,22 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.Connection.Method; import com.rarchives.ripme.ui.RipStatusMessage;
import org.jsoup.Connection.Response; import org.json.JSONArray;
import org.jsoup.Jsoup; import org.json.JSONObject;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Base64;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils; import org.jsoup.nodes.Element;
public class FlickrRipper extends AbstractHTMLRipper { public class FlickrRipper extends AbstractHTMLRipper {
private int page = 1;
private Set<String> attempted = new HashSet<>();
private Document albumDoc = null; private Document albumDoc = null;
private final DownloadThreadPool flickrThreadPool; private final DownloadThreadPool flickrThreadPool;
@Override @Override
@@ -36,6 +26,11 @@ public class FlickrRipper extends AbstractHTMLRipper {
return flickrThreadPool; return flickrThreadPool;
} }
@Override
public boolean hasASAPRipping() {
return true;
}
public FlickrRipper(URL url) throws IOException { public FlickrRipper(URL url) throws IOException {
super(url); super(url);
flickrThreadPool = new DownloadThreadPool(); flickrThreadPool = new DownloadThreadPool();
@@ -50,6 +45,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
return "flickr.com"; return "flickr.com";
} }
@Override
public URL sanitizeURL(URL url) throws MalformedURLException { public URL sanitizeURL(URL url) throws MalformedURLException {
String sUrl = url.toExternalForm(); String sUrl = url.toExternalForm();
// Strip out https // Strip out https
@@ -63,7 +59,87 @@ public class FlickrRipper extends AbstractHTMLRipper {
} }
return new URL(sUrl); return new URL(sUrl);
} }
// FLickr is one of those sites what includes a api key in sites javascript
// TODO let the user provide their own api key
private String getAPIKey(Document doc) {
Pattern p;
Matcher m;
p = Pattern.compile("root.YUI_config.flickr.api.site_key = \"([a-zA-Z0-9]*)\";");
for (Element e : doc.select("script")) {
// You have to use .html here as .text will strip most of the javascript
m = p.matcher(e.html());
if (m.find()) {
LOGGER.info("Found api key:" + m.group(1));
return m.group(1);
}
}
LOGGER.error("Unable to get api key");
// A nice error message to tell our users what went wrong
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Unable to extract api key from flickr");
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Using hardcoded api key");
return "935649baf09b2cc50628e2b306e4da5d";
}
// The flickr api is a monster of weird settings so we just request everything that the webview does
private String apiURLBuilder(String photoset, String pageNumber, String apiKey) {
LOGGER.info("https://api.flickr.com/services/rest?extras=can_addmeta," +
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
"photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1");
return "https://api.flickr.com/services/rest?extras=can_addmeta," +
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
"photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1";
}
private JSONObject getJSON(String page, String apiKey) {
URL pageURL = null;
String apiURL = null;
try {
apiURL = apiURLBuilder(getPhotosetID(url.toExternalForm()), page, apiKey);
pageURL = new URL(apiURL);
} catch (MalformedURLException e) {
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
}
try {
LOGGER.info(Http.url(pageURL).ignoreContentType().get().text());
return new JSONObject(Http.url(pageURL).ignoreContentType().get().text());
} catch (IOException e) {
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
return null;
}
}
private String getPhotosetID(String url) {
Pattern p; Matcher m;
// Root: https://www.flickr.com/photos/115858035@N04/
// Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/
final String domainRegex = "https?://[wm.]*flickr.com";
final String userRegex = "[a-zA-Z0-9@_-]+";
// Album
p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/(sets|albums)/([0-9]+)/?.*$");
m = p.matcher(url);
if (m.matches()) {
return m.group(3);
}
return null;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException { public String getAlbumTitle(URL url) throws MalformedURLException {
if (!url.toExternalForm().contains("/sets/")) { if (!url.toExternalForm().contains("/sets/")) {
return super.getAlbumTitle(url); return super.getAlbumTitle(url);
@@ -92,7 +168,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
// Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/ // Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/
final String domainRegex = "https?://[wm.]*flickr.com"; final String domainRegex = "https?://[wm.]*flickr.com";
final String userRegex = "[a-zA-Z0-9@]+"; final String userRegex = "[a-zA-Z0-9@_-]+";
// Album // Album
p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/sets/([0-9]+)/?.*$"); p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/sets/([0-9]+)/?.*$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
@@ -129,167 +205,49 @@ public class FlickrRipper extends AbstractHTMLRipper {
} }
@Override @Override
public Document getNextPage(Document doc) throws IOException { public List<String> getURLsFromPage(Document doc) {
if (isThisATest()) {
return null;
}
// Find how many pages there are
int lastPage = 0;
for (Element apage : doc.select("a[data-track^=page-]")) {
String lastPageStr = apage.attr("data-track").replace("page-", "");
lastPage = Integer.parseInt(lastPageStr);
}
// If we're at the last page, stop.
if (page >= lastPage) {
throw new IOException("No more pages");
}
// Load the next page
page++;
albumDoc = null;
String nextURL = this.url.toExternalForm();
if (!nextURL.endsWith("/")) {
nextURL += "/";
}
nextURL += "page" + page + "/";
// Wait a bit
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
throw new IOException("Interrupted while waiting to load next page " + nextURL);
}
return Http.url(nextURL).get();
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : page.select("a[data-track=photo-click]")) {
/* TODO find a way to persist the image title
String imageTitle = null;
if (thumb.hasAttr("title")) {
imageTitle = thumb.attr("title");
}
*/
String imagePage = thumb.attr("href");
if (imagePage.startsWith("/")) {
imagePage = "http://www.flickr.com" + imagePage;
}
if (imagePage.contains("/in/")) {
imagePage = imagePage.substring(0, imagePage.indexOf("/in/") + 1);
}
if (!imagePage.endsWith("/")) {
imagePage += "/";
}
imagePage += "sizes/o/";
// Check for duplicates int x = 1;
if (attempted.contains(imagePage)) { while (true) {
continue; JSONObject jsonData = getJSON(String.valueOf(x), getAPIKey(doc));
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
break;
} else {
int totalPages = jsonData.getJSONObject("photoset").getInt("pages");
LOGGER.info(jsonData);
JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo");
for (int i = 0; i < pictures.length(); i++) {
LOGGER.info(i);
JSONObject data = (JSONObject) pictures.get(i);
// TODO this is a total hack, we should loop over all image sizes and pick the biggest one and not
// just assume
List<String> imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t");
for ( String imageSize : imageSizes) {
try {
addURLToDownload(new URL(data.getString("url_" + imageSize)));
LOGGER.info("Adding picture " + data.getString("url_" + imageSize));
break;
} catch (org.json.JSONException ignore) {
// TODO warn the user when we hit a Malformed url
} catch (MalformedURLException e) {}
} }
attempted.add(imagePage); }
imageURLs.add(imagePage); if (x >= totalPages) {
if (isThisATest()) { // The rips done
break; break;
} }
// We have more pages to download so we rerun the loop
x++;
} }
}
return imageURLs; return imageURLs;
} }
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
// Add image page to threadpool to grab the image & download it addURLToDownload(url, getPrefix(index));
FlickrImageThread mit = new FlickrImageThread(url, index);
flickrThreadPool.addThread(mit);
}
/**
* Login to Flickr.
* @return Cookies for logged-in session
* @throws IOException
*/
@SuppressWarnings("unused")
private Map<String,String> signinToFlickr() throws IOException {
Response resp = Jsoup.connect("http://www.flickr.com/signin/")
.userAgent(USER_AGENT)
.followRedirects(true)
.method(Method.GET)
.execute();
Document doc = resp.parse();
Map<String,String> postData = new HashMap<>();
for (Element input : doc.select("input[type=hidden]")) {
postData.put(input.attr("name"), input.attr("value"));
}
postData.put("passwd_raw", "");
postData.put(".save", "");
postData.put("login", new String(Base64.decode("bGVmYWtlZGVmYWtl")));
postData.put("passwd", new String(Base64.decode("MUZha2V5ZmFrZQ==")));
String action = doc.select("form[method=post]").get(0).attr("action");
resp = Jsoup.connect(action)
.cookies(resp.cookies())
.data(postData)
.method(Method.POST)
.execute();
return resp.cookies();
}
/**
* Helper class to find and download images found on "image" pages
*/
private class FlickrImageThread extends Thread {
private URL url;
private int index;
FlickrImageThread(URL url, int index) {
super();
this.url = url;
this.index = index;
}
@Override
public void run() {
try {
Document doc = getLargestImagePageDocument(this.url);
Elements fullsizeImages = doc.select("div#allsizes-photo img");
if (fullsizeImages.isEmpty()) {
LOGGER.error("Could not find flickr image at " + doc.location() + " - missing 'div#allsizes-photo img'");
}
else {
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
synchronized (flickrThreadPool) {
addURLToDownload(new URL(fullsizeImages.first().attr("src")), prefix);
}
}
} catch (IOException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
private Document getLargestImagePageDocument(URL url) throws IOException {
// Get current page
Document doc = Http.url(url).get();
// Look for larger image page
String largestImagePage = this.url.toExternalForm();
for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) {
Elements ola = olSize.select("a");
if (ola.isEmpty()) {
largestImagePage = this.url.toExternalForm();
}
else {
String candImage = ola.get(0).attr("href");
if (candImage.startsWith("/")) {
candImage = "http://www.flickr.com" + candImage;
}
largestImagePage = candImage;
}
}
if (!largestImagePage.equals(this.url.toExternalForm())) {
// Found larger image page, get it.
doc = Http.url(largestImagePage).get();
}
return doc;
}
} }
} }

View File

@@ -39,6 +39,26 @@ public class HqpornerRipper extends AbstractSingleFileRipper {
return null; return null;
} }
private String getVideoFromFlyFlv(String url) {
try {
logger.info("Downloading " + url);
Document page = Http.url(url).referrer(url).get();
String[] videoSizes = { "1080p","720p","360p"};
for (String videoSize : videoSizes) {
String urlToReturn = page.select("video > source[label=" + videoSize).attr("src");
if (urlToReturn != null && !urlToReturn.equals("")) {
return urlToReturn;
}
}
} catch (IOException e) {
logger.error("Unable to get page with video");
}
return null;
}
private String getVideoName() { private String getVideoName() {
try { try {
String filename = getGID(url); String filename = getGID(url);
@@ -77,8 +97,19 @@ public class HqpornerRipper extends AbstractSingleFileRipper {
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
String videoUrl = null;
List<String> result = new ArrayList<>(); List<String> result = new ArrayList<>();
result.add("https:" + getVideoFromMyDaddycc("https:" + doc.select("div.videoWrapper > iframe").attr("src"))); String videoPageUrl = "https:" + doc.select("div.videoWrapper > iframe").attr("src");
if (videoPageUrl.contains("mydaddy")) {
videoUrl = getVideoFromMyDaddycc(videoPageUrl);
} else if (videoPageUrl.contains("flyflv")) {
videoUrl = getVideoFromFlyFlv(videoPageUrl);
}
if (videoUrl != null) {
result.add("https:" + videoUrl);
}
return result; return result;
} }

View File

@@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Utils;
public class UpdateUtils { public class UpdateUtils {
private static final Logger logger = Logger.getLogger(UpdateUtils.class); private static final Logger logger = Logger.getLogger(UpdateUtils.class);
private static final String DEFAULT_VERSION = "1.7.69"; private static final String DEFAULT_VERSION = "1.7.70";
private static final String REPO_NAME = "ripmeapp/ripme"; private static final String REPO_NAME = "ripmeapp/ripme";
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
private static final String mainFileName = "ripme.jar"; private static final String mainFileName = "ripme.jar";