1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-26 07:14:38 +02:00

Merge branch 'master' into master

This commit is contained in:
cyian-1756
2019-07-05 12:28:34 -05:00
committed by GitHub
8 changed files with 302 additions and 18 deletions

View File

@@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId>
<packaging>jar</packaging>
<version>1.7.84</version>
<version>1.7.85</version>
<name>ripme</name>
<url>http://rip.rarchives.com</url>
<properties>

View File

@@ -1,7 +1,7 @@
{
"currentHash": "ff04585ca5a2d136174b959cf7652fd4149feceaf4071ae57f25b50d607d7370",
"latestVersion": "1.7.84",
"latestVersion": "1.7.85",
"changeList": [
"1.7.85: Fixed instagram ripper; Flickr ripper now downloads largest image",
"1.7.84: Fixed instagram ripper; xhamster ripper now accepts urls with page numbers; Fixed Deviantart Ripper",
"1.7.83: Added a ripper for hentaifox.com; Added ripper for Erofus.com; Fixed fsktr not ripping some images; Added support for Gfycat profiles; Added opt to disable prefix for HentaifoundryRipper ",
"1.7.82: Hentai foundry now rips oldest first by default; 8muses ripper no longer makes unneeded requests; Added support for i.thechive.com",
@@ -256,5 +256,6 @@
"1.0.4: Fixed spaces-in-directory bug",
"1.0.3: Added VK.com ripper",
"1.0.1: Added auto-update functionality"
]
],
"currentHash": "874aceffdad02ab8147b588641229a9743b8e78b3681b3ff5a733cbd2faa9009"
}

View File

@@ -207,10 +207,10 @@ public class FlickrRipper extends AbstractHTMLRipper {
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<>();
String apiKey = getAPIKey(doc);
int x = 1;
while (true) {
JSONObject jsonData = getJSON(String.valueOf(x), getAPIKey(doc));
JSONObject jsonData = getJSON(String.valueOf(x), apiKey);
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
break;
} else {
@@ -220,18 +220,12 @@ public class FlickrRipper extends AbstractHTMLRipper {
for (int i = 0; i < pictures.length(); i++) {
LOGGER.info(i);
JSONObject data = (JSONObject) pictures.get(i);
// TODO this is a total hack, we should loop over all image sizes and pick the biggest one and not
// just assume
List<String> imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t");
for ( String imageSize : imageSizes) {
try {
addURLToDownload(new URL(data.getString("url_" + imageSize)));
LOGGER.info("Adding picture " + data.getString("url_" + imageSize));
break;
} catch (org.json.JSONException ignore) {
// TODO warn the user when we hit a Malformed url
} catch (MalformedURLException e) {}
try {
addURLToDownload(getLargestImageURL(data.getString("id"), apiKey));
} catch (MalformedURLException e) {
LOGGER.error("Flickr MalformedURLException: " + e.getMessage());
}
}
if (x >= totalPages) {
// The rips done
@@ -250,4 +244,26 @@ public class FlickrRipper extends AbstractHTMLRipper {
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException {
TreeMap<Integer, String> imageURLMap = new TreeMap<>();
try {
URL imageAPIURL = new URL("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1");
JSONArray imageSizes = new JSONObject(Http.url(imageAPIURL).ignoreContentType().get().text()).getJSONObject("sizes").getJSONArray("size");
for (int i = 0; i < imageSizes.length(); i++) {
JSONObject imageInfo = imageSizes.getJSONObject(i);
imageURLMap.put(imageInfo.getInt("width") * imageInfo.getInt("height"), imageInfo.getString("source"));
}
} catch (org.json.JSONException e) {
LOGGER.error("Error in parsing of Flickr API: " + e.getMessage());
} catch (MalformedURLException e) {
LOGGER.error("Malformed URL returned by API");
} catch (IOException e) {
LOGGER.error("IOException while looking at image sizes: " + e.getMessage());
}
return new URL(imageURLMap.lastEntry().getValue());
}
}

View File

@@ -0,0 +1,92 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class MeituriRipper extends AbstractHTMLRipper {
public MeituriRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "meituri";
}
@Override
public String getDomain() {
return "meituri.com";
}
// To use in getting URLs
String albumID = "";
@Override
public String getGID(URL url) throws MalformedURLException {
// without escape
// ^https?://[w.]*meituri\.com/a/([0-9]+)/([0-9\.html]+)*$
// https://www.meituri.com/a/14449/
// also matches https://www.meituri.com/a/14449/3.html etc.
// group 1 is 14449
Pattern p = Pattern.compile("^https?://[w.]*meituri\\.com/a/([0-9]+)/([0-9\\.html]+)*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
albumID = m.group(1);
return m.group(1);
}
throw new MalformedURLException(
"Expected meituri.com URL format: " + "meituri.com/a/albumid/ - got " + url + "instead");
}
@Override
public Document getFirstPage() throws IOException {
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<>();
// Get number of images from the page
// Then generate links according to that
String numOfImages = "";
// A very ugly way of getting "图片数量: 55P" from paragraphs
// 3rd p in div.tuji
int n = 0;
for (Element para : doc.select("div.tuji > p")) {
// 图片数量: 55P
if (n == 2) {
numOfImages = para.toString();
}
n++;
}
// ["<p>图片数量:", "55P</p>"]
String[] splitNumOfImages = numOfImages.split(" ");
// "55P</p>" -> "55" -> 55
int actualNumOfImages = Integer.parseInt(splitNumOfImages[1].replace("P</p>", ""));
// Base URL: http://ii.hywly.com/a/1/albumid/imgnum.jpg
String baseURL = "http://ii.hywly.com/a/1/" + albumID + "/";
// Loop through and add images to the URL list
for (int i = 1; i <= actualNumOfImages; i++) {
imageURLs.add(baseURL + i + ".jpg");
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@@ -0,0 +1,134 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class NewgroundsRipper extends AbstractHTMLRipper {
private String username = ""; // Name of artist
// Extensions supported by Newgrounds
private List<String> ALLOWED_EXTENSIONS = Arrays.asList("png", "gif", "jpeg", "jpg");
// Images are pulled 60 at a time, a new page request is needed when count == 60
private int pageNumber = 1;
private int count = 0;
public NewgroundsRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "newgrounds";
}
@Override
protected String getDomain() {
return "newgrounds.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(.+).newgrounds.com/?.*");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
this.username = m.group(1);
return m.group(1);
}
throw new MalformedURLException("Expected newgrounds.com URL format: " +
"username.newgrounds.com/art - got " + url + " instead");
}
@Override
protected Document getFirstPage() throws IOException {
return Http.url("https://" + this.username + ".newgrounds.com/art").get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
if(this.count < 60) {
throw new IOException("No more pages");
}
this.count = 0; // New page found so reset count
return Http.url("https://" + this.username + ".newgrounds.com/art/page/" + this.pageNumber)
.header("X-Requested-With", "XMLHttpRequest").get(); // Send header to imitate scrolling
}
@Override
protected List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<>();
String documentHTMLString = page.toString().replaceAll("&quot;", "");
String findStr = "newgrounds.com\\/art\\/view\\/" + this.username;
int lastIndex = 0;
// Index where findStr is found; each occasion contains the link to an image
ArrayList<Integer> indices = new ArrayList<>();
while(lastIndex != -1){
lastIndex = documentHTMLString.indexOf(findStr, lastIndex);
if(lastIndex != -1){
this.count ++;
lastIndex += findStr.length();
indices.add(lastIndex);
}
}
// Retrieve direct URL for image
for(int i = 0; i < indices.size(); i++){
String imageUrl = "https://art.ngfiles.com/images/";
String inLink = "https://www.newgrounds.com/art/view/" + this.username + "/";
String s;
if(i == indices.size() - 1){
s = documentHTMLString.substring(indices.get(i) + 2);
} else{
s = documentHTMLString.substring(indices.get(i) + 2, indices.get(i + 1));
}
s = s.replaceAll("\n", "").replaceAll("\t", "")
.replaceAll("\\\\", "");
Pattern p = Pattern.compile("(.*?)\" class.*/thumbnails/(.*?)/(.*?)\\.");
Matcher m = p.matcher(s);
if (m.lookingAt()) {
String testURL = m.group(3) + "_" + this.username + "_" + m.group(1);
// Open new document to get full sized image
try {
Document imagePage = Http.url(inLink + m.group(1)).get();
for(String extensions: this.ALLOWED_EXTENSIONS){
if(imagePage.toString().contains(testURL + "." + extensions)){
imageUrl += m.group(2) + "/" + m.group(3) + "_" + this.username + "_" + m.group(1) + "." + extensions;
imageURLs.add(imageUrl);
break;
}
}
} catch (IOException e) {
LOGGER.error("IO Error on trying to check extension: " + inLink + m.group(1));
}
}
}
this.pageNumber += 1;
return imageURLs;
}
@Override
protected void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@@ -23,7 +23,7 @@ import com.rarchives.ripme.utils.Utils;
public class UpdateUtils {
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
private static final String DEFAULT_VERSION = "1.7.84";
private static final String DEFAULT_VERSION = "1.7.85";
private static final String REPO_NAME = "ripmeapp/ripme";
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
private static String mainFileName;

View File

@@ -0,0 +1,19 @@
package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URL;
import com.rarchives.ripme.ripper.rippers.MeituriRipper;
public class MeituriRipperTest extends RippersTest {
public void testMeituriRip() throws IOException {
MeituriRipper ripper = new MeituriRipper(new URL("https://www.meituri.com/a/14449/"));
testRipper(ripper);
}
public void testGetGID() throws IOException {
URL url = new URL("https://www.meituri.com/a/14449/");
MeituriRipper ripper = new MeituriRipper(url);
assertEquals("14449", ripper.getGID(url));
}
}

View File

@@ -0,0 +1,22 @@
package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.NewgroundsRipper;
import java.io.IOException;
import java.net.URL;
public class NewgroundsRipperTest extends RippersTest {
public void testNewgroundsRip() throws IOException {
NewgroundsRipper ripper = new NewgroundsRipper(new URL("https://zone-sama.newgrounds.com/art"));
testRipper(ripper);
}
public void testGetGID() throws IOException {
URL url = new URL("https://zone-sama.newgrounds.com/art");
NewgroundsRipper ripper = new NewgroundsRipper(url);
assertEquals("zone-sama", ripper.getGID(url));
}
}