mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-26 07:14:38 +02:00
Merge branch 'master' into master
This commit is contained in:
2
pom.xml
2
pom.xml
@@ -4,7 +4,7 @@
|
||||
<groupId>com.rarchives.ripme</groupId>
|
||||
<artifactId>ripme</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>1.7.84</version>
|
||||
<version>1.7.85</version>
|
||||
<name>ripme</name>
|
||||
<url>http://rip.rarchives.com</url>
|
||||
<properties>
|
||||
|
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"currentHash": "ff04585ca5a2d136174b959cf7652fd4149feceaf4071ae57f25b50d607d7370",
|
||||
"latestVersion": "1.7.84",
|
||||
"latestVersion": "1.7.85",
|
||||
"changeList": [
|
||||
"1.7.85: Fixed instagram ripper; Flickr ripper now downloads largest image",
|
||||
"1.7.84: Fixed instagram ripper; xhamster ripper now accepts urls with page numbers; Fixed Deviantart Ripper",
|
||||
"1.7.83: Added a ripper for hentaifox.com; Added ripper for Erofus.com; Fixed fsktr not ripping some images; Added support for Gfycat profiles; Added opt to disable prefix for HentaifoundryRipper ",
|
||||
"1.7.82: Hentai foundry now rips oldest first by default; 8muses ripper no longer makes unneeded requests; Added support for i.thechive.com",
|
||||
@@ -256,5 +256,6 @@
|
||||
"1.0.4: Fixed spaces-in-directory bug",
|
||||
"1.0.3: Added VK.com ripper",
|
||||
"1.0.1: Added auto-update functionality"
|
||||
]
|
||||
],
|
||||
"currentHash": "874aceffdad02ab8147b588641229a9743b8e78b3681b3ff5a733cbd2faa9009"
|
||||
}
|
@@ -207,10 +207,10 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
|
||||
String apiKey = getAPIKey(doc);
|
||||
int x = 1;
|
||||
while (true) {
|
||||
JSONObject jsonData = getJSON(String.valueOf(x), getAPIKey(doc));
|
||||
JSONObject jsonData = getJSON(String.valueOf(x), apiKey);
|
||||
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
|
||||
break;
|
||||
} else {
|
||||
@@ -220,18 +220,12 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
for (int i = 0; i < pictures.length(); i++) {
|
||||
LOGGER.info(i);
|
||||
JSONObject data = (JSONObject) pictures.get(i);
|
||||
// TODO this is a total hack, we should loop over all image sizes and pick the biggest one and not
|
||||
// just assume
|
||||
List<String> imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t");
|
||||
for ( String imageSize : imageSizes) {
|
||||
try {
|
||||
addURLToDownload(new URL(data.getString("url_" + imageSize)));
|
||||
LOGGER.info("Adding picture " + data.getString("url_" + imageSize));
|
||||
break;
|
||||
} catch (org.json.JSONException ignore) {
|
||||
// TODO warn the user when we hit a Malformed url
|
||||
} catch (MalformedURLException e) {}
|
||||
try {
|
||||
addURLToDownload(getLargestImageURL(data.getString("id"), apiKey));
|
||||
} catch (MalformedURLException e) {
|
||||
LOGGER.error("Flickr MalformedURLException: " + e.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
if (x >= totalPages) {
|
||||
// The rips done
|
||||
@@ -250,4 +244,26 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException {
|
||||
TreeMap<Integer, String> imageURLMap = new TreeMap<>();
|
||||
|
||||
try {
|
||||
URL imageAPIURL = new URL("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1");
|
||||
JSONArray imageSizes = new JSONObject(Http.url(imageAPIURL).ignoreContentType().get().text()).getJSONObject("sizes").getJSONArray("size");
|
||||
for (int i = 0; i < imageSizes.length(); i++) {
|
||||
JSONObject imageInfo = imageSizes.getJSONObject(i);
|
||||
imageURLMap.put(imageInfo.getInt("width") * imageInfo.getInt("height"), imageInfo.getString("source"));
|
||||
}
|
||||
|
||||
} catch (org.json.JSONException e) {
|
||||
LOGGER.error("Error in parsing of Flickr API: " + e.getMessage());
|
||||
} catch (MalformedURLException e) {
|
||||
LOGGER.error("Malformed URL returned by API");
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("IOException while looking at image sizes: " + e.getMessage());
|
||||
}
|
||||
|
||||
return new URL(imageURLMap.lastEntry().getValue());
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,92 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class MeituriRipper extends AbstractHTMLRipper {
|
||||
public MeituriRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "meituri";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "meituri.com";
|
||||
}
|
||||
|
||||
// To use in getting URLs
|
||||
String albumID = "";
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
// without escape
|
||||
// ^https?://[w.]*meituri\.com/a/([0-9]+)/([0-9\.html]+)*$
|
||||
// https://www.meituri.com/a/14449/
|
||||
// also matches https://www.meituri.com/a/14449/3.html etc.
|
||||
// group 1 is 14449
|
||||
Pattern p = Pattern.compile("^https?://[w.]*meituri\\.com/a/([0-9]+)/([0-9\\.html]+)*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
albumID = m.group(1);
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException(
|
||||
"Expected meituri.com URL format: " + "meituri.com/a/albumid/ - got " + url + "instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
// Get number of images from the page
|
||||
// Then generate links according to that
|
||||
String numOfImages = "";
|
||||
// A very ugly way of getting "图片数量: 55P" from paragraphs
|
||||
// 3rd p in div.tuji
|
||||
int n = 0;
|
||||
for (Element para : doc.select("div.tuji > p")) {
|
||||
// 图片数量: 55P
|
||||
if (n == 2) {
|
||||
numOfImages = para.toString();
|
||||
}
|
||||
n++;
|
||||
}
|
||||
// ["<p>图片数量:", "55P</p>"]
|
||||
String[] splitNumOfImages = numOfImages.split(" ");
|
||||
// "55P</p>" -> "55" -> 55
|
||||
int actualNumOfImages = Integer.parseInt(splitNumOfImages[1].replace("P</p>", ""));
|
||||
|
||||
// Base URL: http://ii.hywly.com/a/1/albumid/imgnum.jpg
|
||||
String baseURL = "http://ii.hywly.com/a/1/" + albumID + "/";
|
||||
|
||||
// Loop through and add images to the URL list
|
||||
for (int i = 1; i <= actualNumOfImages; i++) {
|
||||
imageURLs.add(baseURL + i + ".jpg");
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
@@ -0,0 +1,134 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class NewgroundsRipper extends AbstractHTMLRipper {
|
||||
|
||||
private String username = ""; // Name of artist
|
||||
|
||||
// Extensions supported by Newgrounds
|
||||
private List<String> ALLOWED_EXTENSIONS = Arrays.asList("png", "gif", "jpeg", "jpg");
|
||||
|
||||
// Images are pulled 60 at a time, a new page request is needed when count == 60
|
||||
private int pageNumber = 1;
|
||||
private int count = 0;
|
||||
|
||||
|
||||
public NewgroundsRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "newgrounds";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return "newgrounds.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://(.+).newgrounds.com/?.*");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
this.username = m.group(1);
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected newgrounds.com URL format: " +
|
||||
"username.newgrounds.com/art - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Document getFirstPage() throws IOException {
|
||||
return Http.url("https://" + this.username + ".newgrounds.com/art").get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
if(this.count < 60) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
this.count = 0; // New page found so reset count
|
||||
return Http.url("https://" + this.username + ".newgrounds.com/art/page/" + this.pageNumber)
|
||||
.header("X-Requested-With", "XMLHttpRequest").get(); // Send header to imitate scrolling
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
String documentHTMLString = page.toString().replaceAll(""", "");
|
||||
String findStr = "newgrounds.com\\/art\\/view\\/" + this.username;
|
||||
int lastIndex = 0;
|
||||
|
||||
// Index where findStr is found; each occasion contains the link to an image
|
||||
ArrayList<Integer> indices = new ArrayList<>();
|
||||
|
||||
while(lastIndex != -1){
|
||||
lastIndex = documentHTMLString.indexOf(findStr, lastIndex);
|
||||
if(lastIndex != -1){
|
||||
this.count ++;
|
||||
lastIndex += findStr.length();
|
||||
indices.add(lastIndex);
|
||||
}
|
||||
}
|
||||
|
||||
// Retrieve direct URL for image
|
||||
for(int i = 0; i < indices.size(); i++){
|
||||
String imageUrl = "https://art.ngfiles.com/images/";
|
||||
|
||||
String inLink = "https://www.newgrounds.com/art/view/" + this.username + "/";
|
||||
String s;
|
||||
if(i == indices.size() - 1){
|
||||
s = documentHTMLString.substring(indices.get(i) + 2);
|
||||
} else{
|
||||
s = documentHTMLString.substring(indices.get(i) + 2, indices.get(i + 1));
|
||||
}
|
||||
|
||||
s = s.replaceAll("\n", "").replaceAll("\t", "")
|
||||
.replaceAll("\\\\", "");
|
||||
|
||||
Pattern p = Pattern.compile("(.*?)\" class.*/thumbnails/(.*?)/(.*?)\\.");
|
||||
Matcher m = p.matcher(s);
|
||||
|
||||
if (m.lookingAt()) {
|
||||
String testURL = m.group(3) + "_" + this.username + "_" + m.group(1);
|
||||
|
||||
// Open new document to get full sized image
|
||||
try {
|
||||
Document imagePage = Http.url(inLink + m.group(1)).get();
|
||||
for(String extensions: this.ALLOWED_EXTENSIONS){
|
||||
if(imagePage.toString().contains(testURL + "." + extensions)){
|
||||
imageUrl += m.group(2) + "/" + m.group(3) + "_" + this.username + "_" + m.group(1) + "." + extensions;
|
||||
imageURLs.add(imageUrl);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("IO Error on trying to check extension: " + inLink + m.group(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
this.pageNumber += 1;
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
@@ -23,7 +23,7 @@ import com.rarchives.ripme.utils.Utils;
|
||||
public class UpdateUtils {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
|
||||
private static final String DEFAULT_VERSION = "1.7.84";
|
||||
private static final String DEFAULT_VERSION = "1.7.85";
|
||||
private static final String REPO_NAME = "ripmeapp/ripme";
|
||||
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
|
||||
private static String mainFileName;
|
||||
|
@@ -0,0 +1,19 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.MeituriRipper;
|
||||
|
||||
public class MeituriRipperTest extends RippersTest {
|
||||
public void testMeituriRip() throws IOException {
|
||||
MeituriRipper ripper = new MeituriRipper(new URL("https://www.meituri.com/a/14449/"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testGetGID() throws IOException {
|
||||
URL url = new URL("https://www.meituri.com/a/14449/");
|
||||
MeituriRipper ripper = new MeituriRipper(url);
|
||||
assertEquals("14449", ripper.getGID(url));
|
||||
}
|
||||
}
|
@@ -0,0 +1,22 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.NewgroundsRipper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
public class NewgroundsRipperTest extends RippersTest {
|
||||
|
||||
public void testNewgroundsRip() throws IOException {
|
||||
NewgroundsRipper ripper = new NewgroundsRipper(new URL("https://zone-sama.newgrounds.com/art"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testGetGID() throws IOException {
|
||||
URL url = new URL("https://zone-sama.newgrounds.com/art");
|
||||
NewgroundsRipper ripper = new NewgroundsRipper(url);
|
||||
assertEquals("zone-sama", ripper.getGID(url));
|
||||
}
|
||||
|
||||
|
||||
}
|
Reference in New Issue
Block a user