From 3c20953f660f0336c8f62bf9c1d1657bbca1e142 Mon Sep 17 00:00:00 2001
From: Trey W
Date: Thu, 2 May 2019 01:04:53 -0500
Subject: [PATCH 01/10] A few changes and fixes to ensure
instagram.download_images_only property is always honored. Fixes issue #1284
---
.../ripme/ripper/rippers/InstagramRipper.java | 137 +++++++++++-------
1 file changed, 85 insertions(+), 52 deletions(-)
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
index d0f8dd9a..b53dd04c 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@@ -77,36 +77,6 @@ public class InstagramRipper extends AbstractJSONRipper {
return true;
}
- private List getPostsFromSinglePage(JSONObject json) {
- List imageURLs = new ArrayList<>();
- JSONArray datas;
- if (json.getJSONObject("entry_data").getJSONArray("PostPage")
- .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
- .has("edge_sidecar_to_children")) {
- datas = json.getJSONObject("entry_data").getJSONArray("PostPage")
- .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
- .getJSONObject("edge_sidecar_to_children").getJSONArray("edges");
- for (int i = 0; i < datas.length(); i++) {
- JSONObject data = (JSONObject) datas.get(i);
- data = data.getJSONObject("node");
- if (data.has("is_video") && data.getBoolean("is_video")) {
- imageURLs.add(data.getString("video_url"));
- } else {
- imageURLs.add(data.getString("display_url"));
- }
- }
- } else {
- JSONObject data = json.getJSONObject("entry_data").getJSONArray("PostPage")
- .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media");
- if (data.getBoolean("is_video")) {
- imageURLs.add(data.getString("video_url"));
- } else {
- imageURLs.add(data.getString("display_url"));
- }
- }
- return imageURLs;
- }
-
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
@@ -280,36 +250,25 @@ public class InstagramRipper extends AbstractJSONRipper {
if (data.getString("__typename").equals("GraphSidecar")) {
try {
Document slideShowDoc = Http.url(new URL("https://www.instagram.com/p/" + data.getString("shortcode"))).get();
- List toAdd = getPostsFromSinglePage(getJSONFromPage(slideShowDoc));
+ List toAdd = getPostsFromSinglePage(getJSONFromPage(slideShowDoc));
for (int slideShowInt = 0; slideShowInt < toAdd.size(); slideShowInt++) {
- addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("shortcode"));
+ addDownloadFromData(toAdd.get(slideShowInt), image_date + data.getString("shortcode"));
}
- } catch (MalformedURLException e) {
- LOGGER.error("Unable to download slide show, URL was malformed");
} catch (IOException e) {
LOGGER.error("Unable to download slide show");
}
}
}
- try {
- if (!data.getBoolean("is_video")) {
- if (imageURLs.isEmpty()) {
- // We add this one item to the array because either wise
- // the ripper will error out because we returned an empty array
- imageURLs.add(getOriginalUrl(data.getString("display_url")));
- }
- addURLToDownload(new URL(data.getString("display_url")), image_date);
- } else {
- if (!Utils.getConfigBoolean("instagram.download_images_only", false)) {
- addURLToDownload(new URL(getVideoFromPage(data.getString("shortcode"))), image_date);
- } else {
- sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("shortcode"));
- }
+
+ if (!data.getBoolean("is_video")) {
+ if (imageURLs.isEmpty()) {
+ // We add this one item to the array because either wise
+ // the ripper will error out because we returned an empty array
+ imageURLs.add(getOriginalUrl(data.getString("display_url")));
}
- } catch (MalformedURLException e) {
- LOGGER.info("Got MalformedURLException");
- return imageURLs;
}
+ // add download from data which handles image vs video
+ addDownloadFromData(data, image_date);
if (isThisATest()) {
break;
@@ -318,12 +277,86 @@ public class InstagramRipper extends AbstractJSONRipper {
} else { // We're ripping from a single page
LOGGER.info("Ripping from single page");
- imageURLs = getPostsFromSinglePage(json);
+ List posts = getPostsFromSinglePage(json);
+ imageURLs = getImageURLsFromPosts(posts);
}
return imageURLs;
}
+ private List getPostsFromSinglePage(JSONObject json) {
+ List posts = new ArrayList<>();
+ JSONArray datas;
+ if (json.getJSONObject("entry_data").getJSONArray("PostPage")
+ .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
+ .has("edge_sidecar_to_children")) {
+ datas = json.getJSONObject("entry_data").getJSONArray("PostPage")
+ .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
+ .getJSONObject("edge_sidecar_to_children").getJSONArray("edges");
+ for (int i = 0; i < datas.length(); i++) {
+ JSONObject data = (JSONObject) datas.get(i);
+ data = data.getJSONObject("node");
+ posts.add(data);
+ }
+ } else {
+ JSONObject data = json.getJSONObject("entry_data").getJSONArray("PostPage")
+ .getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media");
+ posts.add(data);
+ }
+ return posts;
+ }
+
+ private List getImageURLsFromPosts(List posts) {
+ List imageURLs = new ArrayList<>();
+ if (posts == null) {
+ LOGGER.error("Failed to get image urls from null posts");
+ return imageURLs;
+ }
+
+ for (int i = 0; i < posts.size(); i++) {
+ JSONObject post = posts.get(i);
+ if (post.getBoolean("is_video")) {
+ // always check if video are being ignored
+ if (isIgnoringVideos()) {
+ sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + post.getString("shortcode"));
+ } else {
+ imageURLs.add(post.getString("video_url"));
+ }
+ } else {
+ imageURLs.add(post.getString("display_url"));
+ }
+ }
+ return imageURLs;
+ }
+
+ // attempt to add download from data, checking for video vs images
+ private void addDownloadFromData(JSONObject data, String prefix) {
+ if (data == null) {
+ LOGGER.error("Failed to add download: null data");
+ return;
+ }
+
+ try {
+ if (data.getBoolean("is_video")) {
+ // always check if video are being ignored to honor the setting
+ if (isIgnoringVideos()) {
+ sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("shortcode"));
+ } else {
+ addURLToDownload(new URL(getVideoFromPage(data.getString("shortcode"))), prefix);
+ }
+ } else {
+ addURLToDownload(new URL(data.getString("display_url")), prefix);
+ }
+
+ } catch (MalformedURLException e) {
+ LOGGER.error("Malformed URL from data", e);
+ }
+ }
+
+ private boolean isIgnoringVideos() {
+ return Utils.getConfigBoolean("instagram.download_images_only", false);
+ }
+
private String getIGGis(String variables) {
String stringToMD5 = rhx_gis + ":" + variables;
LOGGER.debug("String to md5 is \"" + stringToMD5 + "\"");
From 788e0b14e81a65b77d82d687fdc07cc437a99f85 Mon Sep 17 00:00:00 2001
From: Jesse Bate
Date: Tue, 4 Jun 2019 22:10:58 +1000
Subject: [PATCH 02/10] Added method to determine best quality image for
download [#1324]
---
.../ripme/ripper/rippers/FlickrRipper.java | 33 ++++++++++++-------
1 file changed, 21 insertions(+), 12 deletions(-)
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
index 6ad75003..20514bb9 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
@@ -220,18 +220,9 @@ public class FlickrRipper extends AbstractHTMLRipper {
for (int i = 0; i < pictures.length(); i++) {
LOGGER.info(i);
JSONObject data = (JSONObject) pictures.get(i);
- // TODO this is a total hack, we should loop over all image sizes and pick the biggest one and not
- // just assume
- List imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t");
- for ( String imageSize : imageSizes) {
- try {
- addURLToDownload(new URL(data.getString("url_" + imageSize)));
- LOGGER.info("Adding picture " + data.getString("url_" + imageSize));
- break;
- } catch (org.json.JSONException ignore) {
- // TODO warn the user when we hit a Malformed url
- } catch (MalformedURLException e) {}
- }
+ try {
+ addURLToDownload(getLargestImageURL(data.getString("id"), getAPIKey(doc)));
+ } catch (IOException ignore) { }
}
if (x >= totalPages) {
// The rips done
@@ -250,4 +241,22 @@ public class FlickrRipper extends AbstractHTMLRipper {
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
+
+ private URL getLargestImageURL(String imageID, String apiKey) throws IOException {
+ URL imageAPIURL = new URL("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1");
+ TreeMap imageURLMap = new TreeMap<>();
+
+ try {
+ JSONArray imageSizes = new JSONObject(Http.url(imageAPIURL).ignoreContentType().get().text()).getJSONObject("sizes").getJSONArray("size");
+ for (int i = 0; i < imageSizes.length(); i++) {
+ JSONObject imageInfo = imageSizes.getJSONObject(i);
+ imageURLMap.put(imageInfo.getInt("width") * imageInfo.getInt("height"), imageInfo.getString("source"));
+ }
+
+ } catch (org.json.JSONException ignore) {
+
+ } catch (MalformedURLException e) {}
+
+ return new URL(imageURLMap.lastEntry().getValue());
+ }
}
From a2b8183c6db47dd52eaf2a375f0c01439a875aee Mon Sep 17 00:00:00 2001
From: Zopyrion <21994296+Zopyrion@users.noreply.github.com>
Date: Mon, 17 Jun 2019 03:00:35 -0400
Subject: [PATCH 03/10] Add Newgrounds ripper
---
.../ripper/rippers/NewgroundsRipper.java | 134 ++++++++++++++++++
.../ripper/rippers/NewgroundsRipperTest.java | 22 +++
2 files changed, 156 insertions(+)
create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java
create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java
new file mode 100644
index 00000000..b3ededc4
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java
@@ -0,0 +1,134 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import org.jsoup.nodes.Document;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class NewgroundsRipper extends AbstractHTMLRipper {
+
+ private String username = ""; // Name of artist
+
+ // Extensions supported by Newgrounds
+ private List ALLOWED_EXTENSIONS = Arrays.asList("png", "gif", "jpeg", "jpg");
+
+ // Images are pulled 60 at a time, a new page request is needed when count == 60
+ private int pageNumber = 1;
+ private int count = 0;
+
+
+ public NewgroundsRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "newgrounds";
+ }
+
+ @Override
+ protected String getDomain() {
+ return "newgrounds.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p = Pattern.compile("^https?://(.+).newgrounds.com/?.*");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ this.username = m.group(1);
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected newgrounds.com URL format: " +
+ "username.newgrounds.com/art - got " + url + " instead");
+ }
+
+ @Override
+ protected Document getFirstPage() throws IOException {
+ return Http.url("https://" + this.username + ".newgrounds.com/art").get();
+ }
+
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ if(this.count < 60) {
+ throw new IOException("No more pages");
+ }
+ this.count = 0; // New page found so reset count
+ return Http.url("https://" + this.username + ".newgrounds.com/art/page/" + this.pageNumber)
+ .header("X-Requested-With", "XMLHttpRequest").get(); // Send header to imitate scrolling
+ }
+
+ @Override
+ protected List getURLsFromPage(Document page) {
+
+ List imageURLs = new ArrayList<>();
+ String documentHTMLString = page.toString().replaceAll(""", "");
+ String findStr = "newgrounds.com\\/art\\/view\\/" + this.username;
+ int lastIndex = 0;
+
+ // Index where findStr is found; each occasion contains the link to an image
+ ArrayList indices = new ArrayList<>();
+
+ while(lastIndex != -1){
+ lastIndex = documentHTMLString.indexOf(findStr, lastIndex);
+ if(lastIndex != -1){
+ this.count ++;
+ lastIndex += findStr.length();
+ indices.add(lastIndex);
+ }
+ }
+
+ // Retrieve direct URL for image
+ for(int i = 0; i < indices.size(); i++){
+ String imageUrl = "https://art.ngfiles.com/images/";
+
+ String inLink = "https://www.newgrounds.com/art/view/" + this.username + "/";
+ String s;
+ if(i == indices.size() - 1){
+ s = documentHTMLString.substring(indices.get(i) + 2);
+ } else{
+ s = documentHTMLString.substring(indices.get(i) + 2, indices.get(i + 1));
+ }
+
+ s = s.replaceAll("\n", "").replaceAll("\t", "")
+ .replaceAll("\\\\", "");
+
+ Pattern p = Pattern.compile("(.*?)\" class.*/thumbnails/(.*?)/(.*?)\\.");
+ Matcher m = p.matcher(s);
+
+ if (m.lookingAt()) {
+ String testURL = m.group(3) + "_" + this.username + "_" + m.group(1);
+
+ // Open new document to get full sized image
+ try {
+ Document imagePage = Http.url(inLink + m.group(1)).get();
+ for(String extensions: this.ALLOWED_EXTENSIONS){
+ if(imagePage.toString().contains(testURL + "." + extensions)){
+ imageUrl += m.group(2) + "/" + m.group(3) + "_" + this.username + "_" + m.group(1) + "." + extensions;
+ imageURLs.add(imageUrl);
+ break;
+ }
+ }
+
+ } catch (IOException e) {
+ LOGGER.error("IO Error on trying to check extension: " + inLink + m.group(1));
+ }
+ }
+ }
+ this.pageNumber += 1;
+ return imageURLs;
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+}
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java
new file mode 100644
index 00000000..9b4ab92c
--- /dev/null
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java
@@ -0,0 +1,22 @@
+package com.rarchives.ripme.tst.ripper.rippers;
+
+import com.rarchives.ripme.ripper.rippers.NewgroundsRipper;
+
+import java.io.IOException;
+import java.net.URL;
+
+public class NewgroundsRipperTest extends RippersTest {
+
+ public void testImgboxRip() throws IOException {
+ NewgroundsRipper ripper = new NewgroundsRipper(new URL("https://zone-sama.newgrounds.com/art"));
+ testRipper(ripper);
+ }
+
+ public void testGetGID() throws IOException {
+ URL url = new URL("https://zone-sama.newgrounds.com/art");
+ NewgroundsRipper ripper = new NewgroundsRipper(url);
+ assertEquals("zone-sama", ripper.getGID(url));
+ }
+
+
+}
From 8d398593a62ea195735271e1883496e78bb55c2f Mon Sep 17 00:00:00 2001
From: Zopyrion <21994296+Zopyrion@users.noreply.github.com>
Date: Mon, 17 Jun 2019 03:02:29 -0400
Subject: [PATCH 04/10] Change NewgroundsRipper test name
---
.../ripme/tst/ripper/rippers/NewgroundsRipperTest.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java
index 9b4ab92c..5486df15 100644
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java
@@ -7,7 +7,7 @@ import java.net.URL;
public class NewgroundsRipperTest extends RippersTest {
- public void testImgboxRip() throws IOException {
+ public void testNewgroundsRip() throws IOException {
NewgroundsRipper ripper = new NewgroundsRipper(new URL("https://zone-sama.newgrounds.com/art"));
testRipper(ripper);
}
From 4a54a085fb2df03819a60e1e982f88930443855b Mon Sep 17 00:00:00 2001
From: Rihannakitten <44688685+Rihannakitten@users.noreply.github.com>
Date: Tue, 18 Jun 2019 14:25:35 +0000
Subject: [PATCH 05/10] quick dirty fix for instagram ripper
---
.../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
index 50fc9e65..a05df206 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@@ -475,7 +475,7 @@ public class InstagramRipper extends AbstractJSONRipper {
private String getQhashUrl(Document doc) {
for(Element el : doc.select("link[rel=preload]")) {
- if (el.attr("href").contains("ProfilePageContainer")) {
+ if (el.attr("href").contains("ProfilePageContainer") && el.attr("href").contains("js")) {
return el.attr("href");
}
}
From 81293b53ed1bfbf5e5d984ec7364c676ebe751cb Mon Sep 17 00:00:00 2001
From: Jesse Bate
Date: Thu, 20 Jun 2019 18:38:45 +1000
Subject: [PATCH 06/10] Added error logging instead of throwing away errors
blindly.
---
.../ripme/ripper/rippers/FlickrRipper.java | 25 ++++++++++++-------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
index 20514bb9..e56cb4a1 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
@@ -207,10 +207,10 @@ public class FlickrRipper extends AbstractHTMLRipper {
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
-
+ String apiKey = getAPIKey(doc);
int x = 1;
while (true) {
- JSONObject jsonData = getJSON(String.valueOf(x), getAPIKey(doc));
+ JSONObject jsonData = getJSON(String.valueOf(x), apiKey);
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
break;
} else {
@@ -221,8 +221,11 @@ public class FlickrRipper extends AbstractHTMLRipper {
LOGGER.info(i);
JSONObject data = (JSONObject) pictures.get(i);
try {
- addURLToDownload(getLargestImageURL(data.getString("id"), getAPIKey(doc)));
- } catch (IOException ignore) { }
+ addURLToDownload(getLargestImageURL(data.getString("id"), apiKey));
+ } catch (MalformedURLException e) {
+ LOGGER.error("Flickr MalformedURLException: " + e.getMessage());
+ }
+
}
if (x >= totalPages) {
// The rips done
@@ -242,20 +245,24 @@ public class FlickrRipper extends AbstractHTMLRipper {
addURLToDownload(url, getPrefix(index));
}
- private URL getLargestImageURL(String imageID, String apiKey) throws IOException {
- URL imageAPIURL = new URL("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1");
+ private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException {
TreeMap imageURLMap = new TreeMap<>();
try {
+ URL imageAPIURL = new URL("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1");
JSONArray imageSizes = new JSONObject(Http.url(imageAPIURL).ignoreContentType().get().text()).getJSONObject("sizes").getJSONArray("size");
for (int i = 0; i < imageSizes.length(); i++) {
JSONObject imageInfo = imageSizes.getJSONObject(i);
imageURLMap.put(imageInfo.getInt("width") * imageInfo.getInt("height"), imageInfo.getString("source"));
}
- } catch (org.json.JSONException ignore) {
-
- } catch (MalformedURLException e) {}
+ } catch (org.json.JSONException e) {
+ LOGGER.error("Error in parsing of Flickr API: " + e.getMessage());
+ } catch (MalformedURLException e) {
+ LOGGER.error("Malformed URL returned by API");
+ } catch (IOException e) {
+ LOGGER.error("IOException while looking at image sizes: " + e.getMessage());
+ }
return new URL(imageURLMap.lastEntry().getValue());
}
From c141ff9da78e5eef975e5dac714285e855779d0f Mon Sep 17 00:00:00 2001
From: cyian-1756
Date: Sat, 22 Jun 2019 03:56:26 -0400
Subject: [PATCH 07/10] Instagram ripper no long throws an error when it can't
find the qhash on single posts
---
.../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
index a05df206..e40d97d3 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@@ -159,7 +159,8 @@ public class InstagramRipper extends AbstractJSONRipper {
Document p = resp.parse();
// Get the query hash so we can download the next page
qHash = getQHash(p);
- if (qHash == null) {
+ // The qHash is not needed if ripping a single post
+ if (qHash == null && !url.toExternalForm().contains("/p/")) {
throw new IOException("Unable to extract qhash from page");
}
return getJSONFromPage(p);
From f63d8fe6828bebf4a6f9685f211669d3057ee362 Mon Sep 17 00:00:00 2001
From: cyian-1756
Date: Sat, 22 Jun 2019 06:51:00 -0400
Subject: [PATCH 08/10] 1.7.85: Fixed instagram ripper; Flickr ripper now
downloads largest image
---
pom.xml | 2 +-
ripme.json | 7 ++++---
src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +-
3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/pom.xml b/pom.xml
index ee7f3d69..e979e2dc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
com.rarchives.ripme
ripme
jar
- 1.7.84
+ 1.7.85
ripme
http://rip.rarchives.com
diff --git a/ripme.json b/ripme.json
index e722dac6..429aa562 100644
--- a/ripme.json
+++ b/ripme.json
@@ -1,7 +1,7 @@
{
- "currentHash": "ff04585ca5a2d136174b959cf7652fd4149feceaf4071ae57f25b50d607d7370",
- "latestVersion": "1.7.84",
+ "latestVersion": "1.7.85",
"changeList": [
+ "1.7.85: Fixed instagram ripper; Flickr ripper now downloads largest image",
"1.7.84: Fixed instagram ripper; xhamster ripper now accepts urls with page numbers; Fixed Deviantart Ripper",
"1.7.83: Added a ripper for hentaifox.com; Added ripper for Erofus.com; Fixed fsktr not ripping some images; Added support for Gfycat profiles; Added opt to disable prefix for HentaifoundryRipper ",
"1.7.82: Hentai foundry now rips oldest first by default; 8muses ripper no longer makes unneeded requests; Added support for i.thechive.com",
@@ -256,5 +256,6 @@
"1.0.4: Fixed spaces-in-directory bug",
"1.0.3: Added VK.com ripper",
"1.0.1: Added auto-update functionality"
- ]
+ ],
+ "currentHash": "874aceffdad02ab8147b588641229a9743b8e78b3681b3ff5a733cbd2faa9009"
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
index b86fe867..3e1ea3b6 100644
--- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
+++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
@@ -23,7 +23,7 @@ import com.rarchives.ripme.utils.Utils;
public class UpdateUtils {
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
- private static final String DEFAULT_VERSION = "1.7.84";
+ private static final String DEFAULT_VERSION = "1.7.85";
private static final String REPO_NAME = "ripmeapp/ripme";
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
private static String mainFileName;
From 71652ecb00bb5ae1ffaf6ea76897a9b4f873c0b1 Mon Sep 17 00:00:00 2001
From: Edvin Boul
Date: Mon, 1 Jul 2019 13:19:54 +0300
Subject: [PATCH 09/10] Added Meituri Ripper [NSFW]
---
.../ripme/ripper/rippers/MeituriRipper.java | 91 +++++++++++++++++++
.../tst/ripper/rippers/MeituriRipperTest.java | 19 ++++
2 files changed, 110 insertions(+)
create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/MeituriRipperTest.java
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
new file mode 100644
index 00000000..8855846a
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
@@ -0,0 +1,91 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+
+public class MeituriRipper extends AbstractHTMLRipper {
+ public MeituriRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "meituri";
+ }
+
+ @Override
+ public String getDomain() {
+ return "meituri.com";
+ }
+
+ // To use in getting URLs
+ String albumID = "";
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ // without escape
+ // ^https?://[w.]*meituri\.com/a/([0-9]+)/$
+ // https://www.meituri.com/a/14449/
+ // group 1 is 14449
+ Pattern p = Pattern.compile("^https?://[w.]*meituri\\.com/a/([0-9]+)/$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ albumID = m.group(1);
+ return m.group(1);
+ }
+ throw new MalformedURLException(
+ "Expected meituri.com URL format: " + "meituri.com/a/albumid/ - got " + url + "instead");
+ }
+
+ @Override
+ public Document getFirstPage() throws IOException {
+ return Http.url(url).get();
+ }
+
+ @Override
+ public List getURLsFromPage(Document doc) {
+ List imageURLs = new ArrayList<>();
+ // Get number of images from the page
+ // Then generate links according to that
+ String numOfImages = "";
+ // A very ugly way of getting "图片数量: 55P" from paragraphs
+ // 3rd p in div.tuji
+ int n = 0;
+ for (Element para : doc.select("div.tuji > p")) {
+ // 图片数量: 55P
+ if (n == 2) {
+ numOfImages = para.toString();
+ }
+ n++;
+ }
+ // ["图片数量:", "55P
"]
+ String[] splitNumOfImages = numOfImages.split(" ");
+ // "55P
" -> "55" -> 55
+ int actualNumOfImages = Integer.parseInt(splitNumOfImages[1].replace("P", ""));
+
+ // Base URL: http://ii.hywly.com/a/1/albumid/imgnum.jpg
+ String baseURL = "http://ii.hywly.com/a/1/" + albumID + "/";
+
+ // Loop through and add images to the URL list
+ for (int i = 1; i <= actualNumOfImages; i++) {
+ imageURLs.add(baseURL + i + ".jpg");
+ }
+ return imageURLs;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+}
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MeituriRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MeituriRipperTest.java
new file mode 100644
index 00000000..a8505590
--- /dev/null
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MeituriRipperTest.java
@@ -0,0 +1,19 @@
+package com.rarchives.ripme.tst.ripper.rippers;
+
+import java.io.IOException;
+import java.net.URL;
+
+import com.rarchives.ripme.ripper.rippers.MeituriRipper;
+
+public class MeituriRipperTest extends RippersTest {
+ public void testMeituriRip() throws IOException {
+ MeituriRipper ripper = new MeituriRipper(new URL("https://www.meituri.com/a/14449/"));
+ testRipper(ripper);
+ }
+
+ public void testGetGID() throws IOException {
+ URL url = new URL("https://www.meituri.com/a/14449/");
+ MeituriRipper ripper = new MeituriRipper(url);
+ assertEquals("14449", ripper.getGID(url));
+ }
+}
From ec22b13cc3c4e6a3cabeba1e4631eca1fc679063 Mon Sep 17 00:00:00 2001
From: Edvin Boul
Date: Mon, 1 Jul 2019 13:28:40 +0300
Subject: [PATCH 10/10] Regex change to also match pagination
---
.../com/rarchives/ripme/ripper/rippers/MeituriRipper.java | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
index 8855846a..4e39a985 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
@@ -35,10 +35,11 @@ public class MeituriRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
// without escape
- // ^https?://[w.]*meituri\.com/a/([0-9]+)/$
+ // ^https?://[w.]*meituri\.com/a/([0-9]+)/([0-9\.html]+)*$
// https://www.meituri.com/a/14449/
+ // also matches https://www.meituri.com/a/14449/3.html etc.
// group 1 is 14449
- Pattern p = Pattern.compile("^https?://[w.]*meituri\\.com/a/([0-9]+)/$");
+ Pattern p = Pattern.compile("^https?://[w.]*meituri\\.com/a/([0-9]+)/([0-9\\.html]+)*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
albumID = m.group(1);