imageURLs = new ArrayList<>();
- Elements thumbs = page.select("#gdt > .gdtm a");
+ Elements thumbs = page.select("#gdt > a");
// Iterate over images on page
for (Element thumb : thumbs) {
imageURLs.add(thumb.attr("href"));
@@ -193,27 +187,26 @@ public class EHentaiRipper extends AbstractHTMLRipper {
@Override
public void downloadURL(URL url, int index) {
- EHentaiImageThread t = new EHentaiImageThread(url, index, this.workingDir);
+ EHentaiImageThread t = new EHentaiImageThread(url, index, this.workingDir.toPath());
ehentaiThreadPool.addThread(t);
try {
Thread.sleep(IMAGE_SLEEP_TIME);
- }
- catch (InterruptedException e) {
+ } catch (InterruptedException e) {
LOGGER.warn("Interrupted while waiting to load next image", e);
}
}
/**
* Helper class to find and download images found on "image" pages
- *
+ *
* Handles case when site has IP-banned the user.
*/
- private class EHentaiImageThread extends Thread {
- private URL url;
- private int index;
- private File workingDir;
+ private class EHentaiImageThread implements Runnable {
+ private final URL url;
+ private final int index;
+ private final Path workingDir;
- EHentaiImageThread(URL url, int index, File workingDir) {
+ EHentaiImageThread(URL url, int index, Path workingDir) {
super();
this.url = url;
this.index = index;
@@ -246,22 +239,21 @@ public class EHentaiRipper extends AbstractHTMLRipper {
Matcher m = p.matcher(imgsrc);
if (m.matches()) {
// Manually discover filename from URL
- String savePath = this.workingDir + File.separator;
+ String savePath = this.workingDir + "/";
if (Utils.getConfigBoolean("download.save_order", true)) {
savePath += String.format("%03d_", index);
}
savePath += m.group(1);
- addURLToDownload(new URL(imgsrc), new File(savePath));
- }
- else {
+ addURLToDownload(new URI(imgsrc).toURL(), Paths.get(savePath));
+ } else {
// Provide prefix and let the AbstractRipper "guess" the filename
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
- addURLToDownload(new URL(imgsrc), prefix);
+ addURLToDownload(new URI(imgsrc).toURL(), prefix);
}
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
index 22968216..7cfd568f 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
@@ -1,8 +1,7 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
+import java.net.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -10,8 +9,6 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import com.rarchives.ripme.utils.Utils;
-import org.json.JSONObject;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
@@ -23,13 +20,7 @@ import com.rarchives.ripme.utils.Http;
public class EightmusesRipper extends AbstractHTMLRipper {
- private Document albumDoc = null;
- private Map cookies = new HashMap<>();
- // TODO put up a wiki page on using maps to store titles
- // the map for storing the title of each album when downloading sub albums
- private Map urlTitles = new HashMap<>();
-
- private Boolean rippingSubalbums = false;
+ private Map cookies = new HashMap<>();
public EightmusesRipper(URL url) throws IOException {
super(url);
@@ -61,10 +52,10 @@ public class EightmusesRipper extends AbstractHTMLRipper {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title as GID
- Element titleElement = getFirstPage().select("meta[name=description]").first();
+ Element titleElement = getCachedFirstPage().select("meta[name=description]").first();
String title = titleElement.attr("content");
title = title.replace("A huge collection of free porn comics for adults. Read", "");
title = title.replace("online for free at 8muses.com", "");
@@ -78,21 +69,18 @@ public class EightmusesRipper extends AbstractHTMLRipper {
@Override
public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- Response resp = Http.url(url).response();
- cookies.putAll(resp.cookies());
- albumDoc = resp.parse();
- }
- return albumDoc;
+ Response resp = Http.url(url).response();
+ cookies.putAll(resp.cookies());
+ return resp.parse();
}
@Override
public List getURLsFromPage(Document page) {
List imageURLs = new ArrayList<>();
- int x = 1;
// This contains the thumbnails of all images on the page
Elements pageImages = page.getElementsByClass("c-tile");
- for (Element thumb : pageImages) {
+ for (int i = 0; i < pageImages.size(); i++) {
+ Element thumb = pageImages.get(i);
// If true this link is a sub album
if (thumb.attr("href").contains("/comics/album/")) {
String subUrl = "https://www.8muses.com" + thumb.attr("href");
@@ -116,24 +104,14 @@ public class EightmusesRipper extends AbstractHTMLRipper {
if (thumb.hasAttr("data-cfsrc")) {
image = thumb.attr("data-cfsrc");
} else {
- // Deobfustace the json data
- String rawJson = deobfuscateJSON(page.select("script#ractive-public").html()
- .replaceAll(">", ">").replaceAll("<", "<").replace("&", "&"));
- JSONObject json = new JSONObject(rawJson);
+ Element imageElement = thumb.select("img").first();
+ image = "https://comics.8muses.com" + imageElement.attr("data-src").replace("/th/", "/fl/");
try {
- for (int i = 0; i != json.getJSONArray("pictures").length(); i++) {
- image = "https://www.8muses.com/image/fl/" + json.getJSONArray("pictures").getJSONObject(i).getString("publicUri");
- URL imageUrl = new URL(image);
- addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, "", null, true);
- // X is our page index
- x++;
- if (isThisATest()) {
- break;
- }
- }
- return imageURLs;
- } catch (MalformedURLException e) {
+ URL imageUrl = new URI(image).toURL();
+ addURLToDownload(imageUrl, getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, getPrefixShort(i), "", null, true);
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.error("\"" + image + "\" is malformed");
+ LOGGER.error(e.getMessage());
}
}
if (!image.contains("8muses.com")) {
@@ -173,25 +151,4 @@ public class EightmusesRipper extends AbstractHTMLRipper {
public String getPrefixShort(int index) {
return String.format("%03d", index);
}
-
- private String deobfuscateJSON(String obfuscatedString) {
- StringBuilder deobfuscatedString = new StringBuilder();
- // The first char in one of 8muses obfuscated strings is always ! so we replace it
- for (char ch : obfuscatedString.replaceFirst("!", "").toCharArray()){
- deobfuscatedString.append(deobfuscateChar(ch));
- }
- return deobfuscatedString.toString();
- }
-
- private String deobfuscateChar(char c) {
- if ((int) c == 32) {
- return fromCharCode(32);
- }
- return fromCharCode(33 + (c + 14) % 94);
-
- }
-
- private static String fromCharCode(int... codePoints) {
- return new String(codePoints, 0, codePoints.length);
- }
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
index d64e9600..0f77e03c 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java
@@ -7,6 +7,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -93,11 +95,11 @@ public class EroShareRipper extends AbstractHTMLRipper {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
if (!is_profile(url)) {
try {
// Attempt to use album title as GID
- Element titleElement = getFirstPage().select("meta[property=og:title]").first();
+ Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
String title = titleElement.attr("content");
title = title.substring(title.lastIndexOf('/') + 1);
return getHost() + "_" + getGID(url) + "_" + title.trim();
@@ -119,7 +121,6 @@ public class EroShareRipper extends AbstractHTMLRipper {
for (Element img : imgs) {
if (img.hasClass("album-image")) {
String imageURL = img.attr("src");
- imageURL = imageURL;
URLs.add(imageURL);
}
}
@@ -195,7 +196,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album or eroshae.com/album");
}
- public static List getURLs(URL url) throws IOException{
+ public static List getURLs(URL url) throws IOException, URISyntaxException {
Response resp = Http.url(url)
.ignoreContentType()
@@ -209,7 +210,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
for (Element img : imgs) {
if (img.hasClass("album-image")) {
String imageURL = img.attr("src");
- URLs.add(new URL(imageURL));
+ URLs.add(new URI(imageURL).toURL());
}
}
//Videos
@@ -218,7 +219,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
if (vid.hasClass("album-video")) {
Elements source = vid.getElementsByTag("source");
String videoURL = source.first().attr("src");
- URLs.add(new URL(videoURL));
+ URLs.add(new URI(videoURL).toURL());
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java
index dc535dea..95528470 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ErofusRipper.java
@@ -9,6 +9,8 @@ import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
@@ -48,11 +50,6 @@ public class ErofusRipper extends AbstractHTMLRipper {
return m.group(m.groupCount());
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document page) {
LOGGER.info(page);
@@ -94,8 +91,8 @@ public class ErofusRipper extends AbstractHTMLRipper {
Map opts = new HashMap();
opts.put("subdirectory", page.title().replaceAll(" \\| Erofus - Sex and Porn Comics", "").replaceAll(" ", "_"));
opts.put("prefix", getPrefix(x));
- addURLToDownload(new URL(image), opts);
- } catch (MalformedURLException e) {
+ addURLToDownload(new URI(image).toURL(), opts);
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.info(e.getMessage());
}
x++;
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java
index 9b586b9a..3035d746 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EromeRipper.java
@@ -2,16 +2,19 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.rarchives.ripme.utils.Utils;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
@@ -23,7 +26,7 @@ import com.rarchives.ripme.utils.Http;
public class EromeRipper extends AbstractHTMLRipper {
boolean rippingProfile;
-
+ private HashMap cookies = new HashMap<>();
public EromeRipper (URL url) throws IOException {
super(url);
@@ -31,17 +34,17 @@ public class EromeRipper extends AbstractHTMLRipper {
@Override
public String getDomain() {
- return "erome.com";
+ return "erome.com";
}
@Override
public String getHost() {
- return "erome";
+ return "erome";
}
@Override
public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index));
+ addURLToDownload(url, getPrefix(index), "", "erome.com", this.cookies);
}
@Override
@@ -66,39 +69,40 @@ public class EromeRipper extends AbstractHTMLRipper {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
- try {
- // Attempt to use album title as GID
- Element titleElement = getFirstPage().select("meta[property=og:title]").first();
- String title = titleElement.attr("content");
- title = title.substring(title.lastIndexOf('/') + 1);
- return getHost() + "_" + getGID(url) + "_" + title.trim();
- } catch (IOException e) {
- // Fall back to default album naming convention
- LOGGER.info("Unable to find title at " + url);
- } catch (NullPointerException e) {
- return getHost() + "_" + getGID(url);
- }
- return super.getAlbumTitle(url);
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
+ try {
+ // Attempt to use album title as GID
+ Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
+ String title = titleElement.attr("content");
+ title = title.substring(title.lastIndexOf('/') + 1);
+ return getHost() + "_" + getGID(url) + "_" + title.trim();
+ } catch (IOException e) {
+ // Fall back to default album naming convention
+ LOGGER.info("Unable to find title at " + url);
+ } catch (NullPointerException e) {
+ return getHost() + "_" + getGID(url);
+ }
+ return super.getAlbumTitle(url);
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- return new URL(url.toExternalForm().replaceAll("https?://erome.com", "https://www.erome.com"));
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
+ return new URI(url.toExternalForm().replaceAll("https?://erome.com", "https://www.erome.com")).toURL();
}
@Override
public List getURLsFromPage(Document doc) {
- List URLs = new ArrayList<>();
return getMediaFromPage(doc);
}
@Override
public Document getFirstPage() throws IOException {
+ this.setAuthCookie();
Response resp = Http.url(this.url)
- .ignoreContentType()
- .response();
+ .cookies(cookies)
+ .ignoreContentType()
+ .response();
return resp.parse();
}
@@ -124,18 +128,17 @@ public class EromeRipper extends AbstractHTMLRipper {
private List getMediaFromPage(Document doc) {
List results = new ArrayList<>();
for (Element el : doc.select("img.img-front")) {
- if (el.hasAttr("src")) {
- if (el.attr("src").startsWith("https:")) {
- results.add(el.attr("src"));
- } else {
- results.add("https:" + el.attr("src"));
- }
- } else if (el.hasAttr("data-src")) {
- //to add images that are not loaded( as all images are lasyloaded as we scroll).
- results.add(el.attr("data-src"));
- }
-
- }
+ if (el.hasAttr("data-src")) {
+ //to add images that are not loaded( as all images are lasyloaded as we scroll).
+ results.add(el.attr("data-src"));
+ } else if (el.hasAttr("src")) {
+ if (el.attr("src").startsWith("https:")) {
+ results.add(el.attr("src"));
+ } else {
+ results.add("https:" + el.attr("src"));
+ }
+ }
+ }
for (Element el : doc.select("source[label=HD]")) {
if (el.attr("src").startsWith("https:")) {
results.add(el.attr("src"));
@@ -152,7 +155,22 @@ public class EromeRipper extends AbstractHTMLRipper {
results.add("https:" + el.attr("src"));
}
}
+
+ if (results.size() == 0) {
+ if (cookies.isEmpty()) {
+ LOGGER.warn("You might try setting erome.laravel_session manually " +
+ "if you think this page definitely contains media.");
+ }
+ }
+
return results;
}
+ private void setAuthCookie() {
+ String sessionId = Utils.getConfigString("erome.laravel_session", null);
+ if (sessionId != null) {
+ cookies.put("laravel_session", sessionId);
+ }
+ }
+
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ErotivRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ErotivRipper.java
index 10e73346..04511085 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ErotivRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ErotivRipper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -11,7 +13,6 @@ import java.util.regex.Pattern;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
@@ -57,8 +58,8 @@ public class ErotivRipper extends AbstractHTMLRipper {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- return new URL(url.toExternalForm().replaceAll("https?://www.erotiv.io", "https://erotiv.io"));
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
+ return new URI(url.toExternalForm().replaceAll("https?://www.erotiv.io", "https://erotiv.io")).toURL();
}
@Override
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java
index 1922002b..2661d055 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java
@@ -12,7 +12,6 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
public class FemjoyhunterRipper extends AbstractHTMLRipper {
@@ -41,12 +40,6 @@ public class FemjoyhunterRipper extends AbstractHTMLRipper {
"femjoyhunter.com/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FitnakedgirlsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FitnakedgirlsRipper.java
index de6fb73d..51d5f15f 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FitnakedgirlsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FitnakedgirlsRipper.java
@@ -1,72 +1,66 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
-
-public class FitnakedgirlsRipper extends AbstractHTMLRipper {
-
- public FitnakedgirlsRipper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public String getHost() {
- return "fitnakedgirls";
- }
-
- @Override
- public String getDomain() {
- return "fitnakedgirls.com";
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p;
- Matcher m;
-
- p = Pattern.compile("^.*fitnakedgirls\\.com/gallery/(.+)$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
-
- throw new MalformedURLException(
- "Expected fitnakedgirls.com gallery format: " + "fitnakedgirls.com/gallery/####" + " Got: " + url);
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
- @Override
- public List getURLsFromPage(Document doc) {
- List imageURLs = new ArrayList<>();
-
- Elements imgs = doc.select("div[class*=wp-tiles-tile-bg] > img");
- for (Element img : imgs) {
- String imgSrc = img.attr("src");
- imageURLs.add(imgSrc);
- }
-
- return imageURLs;
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- // Send referrer when downloading images
- addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
- }
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+
+public class FitnakedgirlsRipper extends AbstractHTMLRipper {
+
+ public FitnakedgirlsRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "fitnakedgirls";
+ }
+
+ @Override
+ public String getDomain() {
+ return "fitnakedgirls.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p;
+ Matcher m;
+
+ p = Pattern.compile("^.*fitnakedgirls\\.com/gallery/(.+)$");
+ m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+
+ throw new MalformedURLException(
+ "Expected fitnakedgirls.com gallery format: " + "fitnakedgirls.com/gallery/####" + " Got: " + url);
+ }
+
+ @Override
+ public List getURLsFromPage(Document doc) {
+ List imageURLs = new ArrayList<>();
+
+ Elements imgs = doc.select("div[class*=wp-tiles-tile-bg] > img");
+ for (Element img : imgs) {
+ String imgSrc = img.attr("src");
+ imageURLs.add(imgSrc);
+ }
+
+ return imageURLs;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ // Send referrer when downloading images
+ addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
+ }
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java
index 6591dd01..bba284f1 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java
@@ -1,10 +1,9 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
import java.io.IOException;
-import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
-import java.net.URL;
+import java.net.*;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@@ -163,8 +162,8 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
}
@Override
- public JSONObject getFirstPage() throws IOException {
- URL apiURL = new URL(baseURL + "&consumer_key=" + CONSUMER_KEY);
+ public JSONObject getFirstPage() throws IOException, URISyntaxException {
+ URL apiURL = new URI(baseURL + "&consumer_key=" + CONSUMER_KEY).toURL();
LOGGER.debug("apiURL: " + apiURL);
JSONObject json = Http.url(apiURL).getJSON();
@@ -231,7 +230,7 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
}
@Override
- public JSONObject getNextPage(JSONObject json) throws IOException {
+ public JSONObject getNextPage(JSONObject json) throws IOException, URISyntaxException {
if (isThisATest()) {
return null;
}
@@ -248,9 +247,9 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
sleep(500);
++page;
- URL apiURL = new URL(baseURL
+ URL apiURL = new URI(baseURL
+ "&page=" + page
- + "&consumer_key=" + CONSUMER_KEY);
+ + "&consumer_key=" + CONSUMER_KEY).toURL();
return Http.url(apiURL).getJSON();
}
@@ -295,14 +294,9 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
}
}
}
- if (imageURL == null) {
- LOGGER.error("Failed to find image for photo " + photo.toString());
- }
- else {
- imageURLs.add(imageURL);
- if (isThisATest()) {
- break;
- }
+ imageURLs.add(imageURL);
+ if (isThisATest()) {
+ break;
}
}
return imageURLs;
@@ -310,13 +304,13 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
private boolean urlExists(String url) {
try {
- HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
+ HttpURLConnection connection = (HttpURLConnection) new URI(url).toURL().openConnection();
connection.setRequestMethod("HEAD");
if (connection.getResponseCode() != 200) {
throw new IOException("Couldn't find full-size image at " + url);
}
return true;
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
return false;
}
}
@@ -330,8 +324,8 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
public void downloadURL(URL url, int index) {
String u = url.toExternalForm();
String[] fields = u.split("/");
- String prefix = getPrefix(index) + fields[fields.length - 3];
- File saveAs = new File(getWorkingDir() + File.separator + prefix + ".jpg");
+ String prefix = "/" + getPrefix(index) + fields[fields.length - 3];
+ Path saveAs = Paths.get(getWorkingDir() + prefix + ".jpg");
addURLToDownload(url, saveAs, "", null, false);
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
index e56cb4a1..c58a7e71 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.*;
import java.util.regex.Matcher;
@@ -10,6 +12,7 @@ import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage;
import org.json.JSONArray;
import org.json.JSONObject;
+import org.json.JSONException;
import org.jsoup.nodes.Document;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
@@ -19,8 +22,23 @@ import org.jsoup.nodes.Element;
public class FlickrRipper extends AbstractHTMLRipper {
- private Document albumDoc = null;
private final DownloadThreadPool flickrThreadPool;
+
+ private enum UrlType {
+ USER,
+ PHOTOSET
+ }
+
+ private class Album {
+ final UrlType type;
+ final String id;
+
+ Album(UrlType type, String id) {
+ this.type = type;
+ this.id = id;
+ }
+ }
+
@Override
public DownloadThreadPool getThreadPool() {
return flickrThreadPool;
@@ -46,7 +64,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String sUrl = url.toExternalForm();
// Strip out https
sUrl = sUrl.replace("https://secure.flickr.com", "http://www.flickr.com");
@@ -57,7 +75,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
}
sUrl += "pool";
}
- return new URL(sUrl);
+ return new URI(sUrl).toURL();
}
// FLickr is one of those sites what includes a api key in sites javascript
// TODO let the user provide their own api key
@@ -81,40 +99,44 @@ public class FlickrRipper extends AbstractHTMLRipper {
}
// The flickr api is a monster of weird settings so we just request everything that the webview does
- private String apiURLBuilder(String photoset, String pageNumber, String apiKey) {
- LOGGER.info("https://api.flickr.com/services/rest?extras=can_addmeta," +
- "can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
- "date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
- "owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
- "url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
- "is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
- "get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
- ",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
- "photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
- "api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1");
+ private String apiURLBuilder(Album album, String pageNumber, String apiKey) {
+ String method = null;
+ String idField = null;
+ switch (album.type) {
+ case PHOTOSET:
+ method = "flickr.photosets.getPhotos";
+ idField = "photoset_id=" + album.id;
+ break;
+ case USER:
+ method = "flickr.people.getPhotos";
+ idField = "user_id=" + album.id;
+ break;
+ }
+
return "https://api.flickr.com/services/rest?extras=can_addmeta," +
- "can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
- "date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
- "owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
- "url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
- "is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
- "get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
- ",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
- "photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
- "api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1";
+ "can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
+ "date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
+ "owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
+ "url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
+ "is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
+ "get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
+ ",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
+ idField + "&viewerNSID=&method=" + method + "&csrf=&" +
+ "api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1";
}
private JSONObject getJSON(String page, String apiKey) {
URL pageURL = null;
String apiURL = null;
try {
- apiURL = apiURLBuilder(getPhotosetID(url.toExternalForm()), page, apiKey);
- pageURL = new URL(apiURL);
- } catch (MalformedURLException e) {
+ apiURL = apiURLBuilder(getAlbum(url.toExternalForm()), page, apiKey);
+ pageURL = new URI(apiURL).toURL();
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
}
try {
- LOGGER.info(Http.url(pageURL).ignoreContentType().get().text());
+ LOGGER.info("Fetching: " + apiURL);
+ LOGGER.info("Response: " + Http.url(pageURL).ignoreContentType().get().text());
return new JSONObject(Http.url(pageURL).ignoreContentType().get().text());
} catch (IOException e) {
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
@@ -122,31 +144,42 @@ public class FlickrRipper extends AbstractHTMLRipper {
}
}
- private String getPhotosetID(String url) {
+ private Album getAlbum(String url) throws MalformedURLException {
Pattern p; Matcher m;
- // Root: https://www.flickr.com/photos/115858035@N04/
+ // User photostream: https://www.flickr.com/photos/115858035@N04/
// Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/
final String domainRegex = "https?://[wm.]*flickr.com";
final String userRegex = "[a-zA-Z0-9@_-]+";
// Album
- p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/(sets|albums)/([0-9]+)/?.*$");
+ p = Pattern.compile("^" + domainRegex + "/photos/" + userRegex + "/(sets|albums)/([0-9]+)/?.*$");
m = p.matcher(url);
if (m.matches()) {
- return m.group(3);
+ return new Album(UrlType.PHOTOSET, m.group(2));
}
- return null;
+
+ // User photostream
+ p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/?$");
+ m = p.matcher(url);
+ if (m.matches()) {
+ return new Album(UrlType.USER, m.group(1));
+ }
+
+ String errorMessage = "Failed to extract photoset ID from url: " + url;
+
+ LOGGER.error(errorMessage);
+ throw new MalformedURLException(errorMessage);
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
if (!url.toExternalForm().contains("/sets/")) {
return super.getAlbumTitle(url);
}
- try {
+ try {
// Attempt to use album title as GID
- Document doc = getFirstPage();
+ Document doc = getCachedFirstPage();
String user = url.toExternalForm();
user = user.substring(user.indexOf("/photos/") + "/photos/".length());
user = user.substring(0, user.indexOf("/"));
@@ -196,13 +229,6 @@ public class FlickrRipper extends AbstractHTMLRipper {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = Http.url(url).get();
- }
- return albumDoc;
- }
@Override
public List getURLsFromPage(Document doc) {
@@ -214,15 +240,29 @@ public class FlickrRipper extends AbstractHTMLRipper {
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
break;
} else {
- int totalPages = jsonData.getJSONObject("photoset").getInt("pages");
+ // Determine root key
+ JSONObject rootData;
+
+ try {
+ rootData = jsonData.getJSONObject("photoset");
+ } catch (JSONException e) {
+ try {
+ rootData = jsonData.getJSONObject("photos");
+ } catch (JSONException innerE) {
+ LOGGER.error("Unable to find photos in response");
+ break;
+ }
+ }
+
+ int totalPages = rootData.getInt("pages");
LOGGER.info(jsonData);
- JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo");
+ JSONArray pictures = rootData.getJSONArray("photo");
for (int i = 0; i < pictures.length(); i++) {
LOGGER.info(i);
JSONObject data = (JSONObject) pictures.get(i);
try {
addURLToDownload(getLargestImageURL(data.getString("id"), apiKey));
- } catch (MalformedURLException e) {
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.error("Flickr MalformedURLException: " + e.getMessage());
}
@@ -245,11 +285,11 @@ public class FlickrRipper extends AbstractHTMLRipper {
addURLToDownload(url, getPrefix(index));
}
- private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException {
+ private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException, URISyntaxException {
TreeMap imageURLMap = new TreeMap<>();
try {
- URL imageAPIURL = new URL("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1");
+ URL imageAPIURL = new URI("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1").toURL();
JSONArray imageSizes = new JSONObject(Http.url(imageAPIURL).ignoreContentType().get().text()).getJSONObject("sizes").getJSONArray("size");
for (int i = 0; i < imageSizes.length(); i++) {
JSONObject imageInfo = imageSizes.getJSONObject(i);
@@ -264,6 +304,6 @@ public class FlickrRipper extends AbstractHTMLRipper {
LOGGER.error("IOException while looking at image sizes: " + e.getMessage());
}
- return new URL(imageURLMap.lastEntry().getValue());
+ return new URI(imageURLMap.lastEntry().getValue()).toURL();
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MulemaxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FooktubeRipper.java
similarity index 67%
rename from src/main/java/com/rarchives/ripme/ripper/rippers/MulemaxRipper.java
rename to src/main/java/com/rarchives/ripme/ripper/rippers/FooktubeRipper.java
index 01bf4b1c..fed1abe0 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MulemaxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FooktubeRipper.java
@@ -10,17 +10,10 @@ import java.util.regex.Pattern;
import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-import com.rarchives.ripme.ripper.VideoRipper;
-import com.rarchives.ripme.utils.Http;
+public class FooktubeRipper extends AbstractSingleFileRipper {
-public class MulemaxRipper extends AbstractSingleFileRipper {
-
- private static final String HOST = "mulemax";
-
- public MulemaxRipper(URL url) throws IOException {
+ public FooktubeRipper(URL url) throws IOException {
super(url);
}
@@ -34,14 +27,10 @@ public class MulemaxRipper extends AbstractSingleFileRipper {
return "mulemax.com";
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
@Override
public boolean canRip(URL url) {
- Pattern p = Pattern.compile("^https?://.*mulemax\\.com/video/(.*)/.*$");
+ Pattern p = Pattern.compile("^https?://.*fooktube\\.com/video/(.*)/.*$");
Matcher m = p.matcher(url.toExternalForm());
return m.matches();
}
@@ -53,15 +42,15 @@ public class MulemaxRipper extends AbstractSingleFileRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://.*mulemax\\.com/video/(.*)/(.*)$");
+ Pattern p = Pattern.compile("^https?://.*fooktube\\.com/video/(.*)/(.*)$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(2);
}
throw new MalformedURLException(
- "Expected mulemax format:"
- + "mulemax.com/video/####"
+ "Expected fooktube format:"
+ + "fooktube.com/video/####"
+ " Got: " + url);
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java
new file mode 100644
index 00000000..a39d3b9b
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java
@@ -0,0 +1,74 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+
+public class FreeComicOnlineRipper extends AbstractHTMLRipper {
+
+ public FreeComicOnlineRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "freecomiconline";
+ }
+
+ @Override
+ public String getDomain() {
+ return "freecomiconline.me";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p = Pattern.compile("https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1) + "_" + m.group(2);
+ }
+ p = Pattern.compile("^https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/?$");
+ m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected freecomiconline URL format: " +
+ "freecomiconline.me/TITLE/CHAPTER - got " + url + " instead");
+ }
+
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ String nextPage = doc.select("div.select-pagination a").get(1).attr("href");
+ String nextUrl = "";
+ Pattern p = Pattern.compile("https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$");
+ Matcher m = p.matcher(nextPage);
+ if(m.matches()){
+ nextUrl = m.group(0);
+ }
+ if(nextUrl.equals("")) throw new IOException("No more pages");
+ sleep(500);
+ return Http.url(nextUrl).get();
+ }
+
+ @Override
+ public List getURLsFromPage(Document doc) {
+ List result = new ArrayList<>();
+ for (Element el : doc.select(".wp-manga-chapter-img")) {
+ result.add(el.attr("src"));
+ }
+ return result;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
index 683c791b..dbb46fe1 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
@@ -1,10 +1,12 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
-import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -15,11 +17,10 @@ import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.Connection.Response;
-import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.safety.Whitelist;
+import org.jsoup.safety.Safelist;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
@@ -91,14 +92,13 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
String nextUrl = urlBase + nextPageUrl.first().attr("href");
sleep(500);
- Document nextPage = Http.url(nextUrl).cookies(cookies).get();
- return nextPage;
+ return Http.url(nextUrl).cookies(cookies).get();
}
private String getImageFromPost(String url) {
sleep(1000);
- Document d = null;
+ Document d;
try {
d = Http.url(url).cookies(cookies).get();
Elements links = d.getElementsByTag("a");
@@ -125,6 +125,9 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
urls.add(urlToAdd);
}
}
+ if (isStopped() || isThisATest()) {
+ break;
+ }
}
return urls;
}
@@ -164,7 +167,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
ele.select("br").append("\\n");
ele.select("p").prepend("\\n\\n");
LOGGER.debug("Returning description at " + page);
- String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
+ String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Safelist.none(), new Document.OutputSettings().prettyPrint(false));
return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
} catch (IOException ioe) {
LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
@@ -181,24 +184,22 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
}
String newText = "";
String saveAs = "";
- File saveFileAs;
+ Path saveFileAs;
saveAs = text.split("\n")[0];
saveAs = saveAs.replaceAll("^(\\S+)\\s+by\\s+(.*)$", "$2_$1");
for (int i = 1;i < text.split("\n").length; i++) {
newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
}
try {
- if (!subdirectory.equals("")) {
- subdirectory = File.separator + subdirectory;
- }
- saveFileAs = new File(
- workingDir.getCanonicalPath()
+ saveFileAs = Paths.get(
+ workingDir
+ + "/"
+ subdirectory
- + File.separator
+ + "/"
+ saveAs
+ ".txt");
// Write the file
- FileOutputStream out = (new FileOutputStream(saveFileAs));
+ OutputStream out = Files.newOutputStream(saveFileAs);
out.write(text.getBytes());
out.close();
} catch (IOException e) {
@@ -206,9 +207,13 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
return false;
}
LOGGER.debug("Downloading " + url + "'s description to " + saveFileAs);
- if (!saveFileAs.getParentFile().exists()) {
+ if (!Files.exists(saveFileAs.getParent())) {
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
- saveFileAs.getParentFile().mkdirs();
+ try {
+ Files.createDirectory(saveFileAs.getParent());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
}
return true;
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java
index d88b16e8..62a60fcc 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FuskatorRipper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -40,7 +42,7 @@ public class FuskatorRipper extends AbstractHTMLRipper {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String u = url.toExternalForm();
if (u.contains("/thumbs/")) {
u = u.replace("/thumbs/", "/full/");
@@ -48,7 +50,7 @@ public class FuskatorRipper extends AbstractHTMLRipper {
if (u.contains("/expanded/")) {
u = u.replaceAll("/expanded/", "/full/");
}
- return new URL(u);
+ return new URI(u).toURL();
}
@Override
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
deleted file mode 100644
index 16205115..00000000
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java
+++ /dev/null
@@ -1,159 +0,0 @@
-package com.rarchives.ripme.ripper.rippers;
-
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import org.json.JSONArray;
-import org.json.JSONObject;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import com.rarchives.ripme.utils.Http;
-
-
-public class GfycatRipper extends AbstractHTMLRipper {
-
- private static final String HOST = "gfycat.com";
- String username = "";
- String cursor = "";
- String count = "30";
-
-
-
- public GfycatRipper(URL url) throws IOException {
- super(new URL(url.toExternalForm().split("-")[0].replace("thumbs.", "")));
- }
-
- @Override
- public String getDomain() {
- return "gfycat.com";
- }
-
- @Override
- public String getHost() {
- return "gfycat";
- }
-
- @Override
- public boolean canRip(URL url) {
- return url.getHost().endsWith(HOST);
- }
-
- @Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- String sUrl = url.toExternalForm();
- sUrl = sUrl.replace("/gifs/detail", "");
- sUrl = sUrl.replace("/amp", "");
- return new URL(sUrl);
- }
-
- public boolean isProfile() {
- Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9]+).*$");
- Matcher m = p.matcher(url.toExternalForm());
- return m.matches();
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- if (!isProfile()) {
- return Http.url(url).get();
- } else {
- username = getGID(url);
- return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats")).ignoreContentType().get();
- }
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index));
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://(thumbs\\.|[wm\\.]*)gfycat\\.com/@?([a-zA-Z0-9]+).*$");
- Matcher m = p.matcher(url.toExternalForm());
-
- if (m.matches())
- return m.group(2);
-
- throw new MalformedURLException(
- "Expected gfycat.com format: "
- + "gfycat.com/id or "
- + "thumbs.gfycat.com/id.gif"
- + " Got: " + url);
- }
-
- private String stripHTMLTags(String t) {
- t = t.replaceAll("\n" +
- " \n" +
- " ", "");
- t = t.replaceAll("\n" +
- "", "");
- t = t.replaceAll("\n", "");
- t = t.replaceAll("=\"\"", "");
- return t;
- }
-
- @Override
- public Document getNextPage(Document doc) throws IOException {
- if (cursor.equals("")) {
- throw new IOException("No more pages");
- }
- return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
- }
-
- @Override
- public List getURLsFromPage(Document doc) {
- List result = new ArrayList<>();
- if (isProfile()) {
- JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
- JSONArray content = page.getJSONArray("gfycats");
- for (int i = 0; i < content.length(); i++) {
- result.add(content.getJSONObject(i).getString("mp4Url"));
- }
- cursor = page.getString("cursor");
- } else {
- Elements videos = doc.select("script");
- for (Element el : videos) {
- String json = el.html();
- if (json.startsWith("{")) {
- JSONObject page = new JSONObject(json);
- result.add(page.getJSONObject("video").getString("contentUrl"));
- }
- }
- }
- return result;
- }
-
- /**
- * Helper method for retrieving video URLs.
- * @param url URL to gfycat page
- * @return URL to video
- * @throws IOException
- */
- public static String getVideoURL(URL url) throws IOException {
- LOGGER.info("Retrieving " + url.toExternalForm());
-
- //Sanitize the URL first
- url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
-
- Document doc = Http.url(url).get();
- Elements videos = doc.select("script");
- for (Element el : videos) {
- String json = el.html();
- if (json.startsWith("{")) {
- JSONObject page = new JSONObject(json);
- return page.getJSONObject("video").getString("contentUrl");
- }
- }
- throw new IOException();
- }
-}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatporntubeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatporntubeRipper.java
index fd8c292a..bdb58ad2 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatporntubeRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatporntubeRipper.java
@@ -11,8 +11,6 @@ import java.util.regex.Pattern;
import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
import org.jsoup.nodes.Document;
-import com.rarchives.ripme.utils.Http;
-
public class GfycatporntubeRipper extends AbstractSingleFileRipper {
public GfycatporntubeRipper(URL url) throws IOException {
@@ -40,12 +38,6 @@ public class GfycatporntubeRipper extends AbstractSingleFileRipper {
"gfycatporntube.com/NAME - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java
index 2afc79d1..49cbfc60 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java
@@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -13,11 +14,8 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
public class GirlsOfDesireRipper extends AbstractHTMLRipper {
- // Current HTML document
- private Document albumDoc = null;
public GirlsOfDesireRipper(URL url) throws IOException {
super(url);
@@ -32,10 +30,10 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
return "girlsofdesire.org";
}
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title as GID
- Document doc = getFirstPage();
+ Document doc = getCachedFirstPage();
Elements elems = doc.select(".albumName");
return getHost() + "_" + elems.first().text();
} catch (Exception e) {
@@ -62,14 +60,6 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = Http.url(url).get();
- }
- return albumDoc;
- }
-
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java
index fd3b23c2..040ca978 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HbrowseRipper.java
@@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -49,9 +50,9 @@ public class HbrowseRipper extends AbstractHTMLRipper {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
- Document doc = getFirstPage();
+ Document doc = getCachedFirstPage();
String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
return getHost() + "_" + title + "_" + getGID(url);
} catch (Exception e) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java
index cb521523..2b8ac967 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java
@@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -90,7 +91,7 @@ public class Hentai2readRipper extends AbstractHTMLRipper {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
return getHost() + "_" + getGID(url);
} catch (Exception e) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java
new file mode 100644
index 00000000..4d28f7a2
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java
@@ -0,0 +1,184 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.rarchives.ripme.utils.Http;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+import com.rarchives.ripme.ripper.AbstractJSONRipper;
+import org.jsoup.nodes.DataNode;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+public class HentaiNexusRipper extends AbstractJSONRipper {
+
+ public HentaiNexusRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "hentainexus";
+ }
+ @Override
+ public String getDomain() {
+ return "hentainexus.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ /*
+ Valid URLs are /view/id, /read/id and those 2 with #pagenumber
+ https://hentainexus.com/view/9202
+ https://hentainexus.com/read/9202
+ https://hentainexus.com/view/9202#001
+ https://hentainexus.com/read/9202#001
+ */
+
+ Pattern p = Pattern.compile("^https?://hentainexus\\.com/(?:view|read)/([0-9]+)(?:\\#[0-9]+)*$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected hentainexus.com URL format: " +
+ "hentainexus.com/view/id OR hentainexus.com/read/id - got " + url + "instead");
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+
+
+ @Override
+ protected List getURLsFromJSON(JSONObject json) throws JSONException {
+
+ List urlList = new ArrayList<>();
+
+ JSONArray imagesList = json.getJSONArray("f");
+ String host = json.getString("b");
+ String folder = json.getString("r");
+ String id = json.getString("i");
+
+ for (Object singleImage : imagesList) {
+ String hashTMP = ((JSONObject) singleImage).getString("h");
+ String fileNameTMP = ((JSONObject) singleImage).getString("p");
+ String imageUrlTMP = String.format("%s%s%s/%s/%s",host,folder,hashTMP,id,fileNameTMP);
+ urlList.add(imageUrlTMP);
+ }
+
+ return urlList;
+ }
+
+ @Override
+ protected JSONObject getFirstPage() throws IOException, URISyntaxException {
+ String jsonEncodedString = getJsonEncodedStringFromPage();
+ String jsonDecodedString = decodeJsonString(jsonEncodedString);
+ return new JSONObject(jsonDecodedString);
+ }
+
+ public String getJsonEncodedStringFromPage() throws MalformedURLException, IOException, URISyntaxException {
+ // Image data only appears on the /read/ page and not on the /view/ one.
+ URL readUrl = new URI(String.format("http://hentainexus.com/read/%s",getGID(url))).toURL();
+ Document document = Http.url(readUrl).response().parse();
+
+ for (Element scripts : document.getElementsByTag("script")) {
+ for (DataNode dataNode : scripts.dataNodes()) {
+ if (dataNode.getWholeData().contains("initReader")) {
+ // Extract JSON encoded string from the JavaScript initReader() call.
+ String data = dataNode.getWholeData().trim().replaceAll("\\r|\\n|\\t","");
+
+ Pattern p = Pattern.compile(".*?initReader\\(\"(.*?)\",.*?\\).*?");
+ Matcher m = p.matcher(data);
+ if (m.matches()) {
+ return m.group(1);
+ }
+ }
+ }
+ }
+ return "";
+ }
+
+ public String decodeJsonString(String jsonEncodedString)
+ {
+ /*
+ The initReader() JavaScript function accepts 2 parameters: a weird string and the window title (we can ignore this).
+ The weird string is a JSON string with some bytes shifted and swapped around and then encoded in base64.
+ The following code is a Java adaptation of the initRender() JavaScript function after manual deobfuscation.
+ */
+
+ byte[] jsonBytes = Base64.getDecoder().decode(jsonEncodedString);
+
+ ArrayList unknownArray = new ArrayList();
+ ArrayList indexesToUse = new ArrayList<>();
+
+ for (int i = 0x2; unknownArray.size() < 0x10; ++i) {
+ if (!indexesToUse.contains(i)) {
+ unknownArray.add(i);
+ for (int j = i << 0x1; j <= 0x100; j += i) {
+ if (!indexesToUse.contains(j)) {
+ indexesToUse.add(j);
+ }
+ }
+ }
+ }
+
+ byte magicByte = 0x0;
+ for (int i = 0x0; i < 0x40; i++) {
+ magicByte = (byte) (signedToUnsigned(magicByte) ^ signedToUnsigned(jsonBytes[i]));
+ for (int j = 0x0; j < 0x8; j++) {
+ long unsignedMagicByteTMP = signedToUnsigned(magicByte);
+ magicByte = (byte) ((unsignedMagicByteTMP & 0x1) == 1 ? unsignedMagicByteTMP >>> 0x1 ^ 0xc : unsignedMagicByteTMP >>> 0x1);
+ }
+ }
+
+ magicByte = (byte) (magicByte & 0x7);
+ ArrayList newArray = new ArrayList<>();
+
+ for (int i = 0x0; i < 0x100; i++) {
+ newArray.add(i);
+ }
+
+ int newIndex = 0, backup = 0;
+ for (int i = 0x0; i < 0x100; i++) {
+ newIndex = (newIndex + newArray.get(i) + (int) signedToUnsigned(jsonBytes[i % 0x40])) % 0x100;
+ backup = newArray.get(i);
+ newArray.set(i, newArray.get(newIndex));
+ newArray.set(newIndex, backup);
+ }
+
+ int magicByteTranslated = (int) unknownArray.get(magicByte);
+ int index1 = 0x0, index2 = 0x0, index3 = 0x0, swap1 = 0x0, xorNumber = 0x0;
+ String decodedJsonString = "";
+
+ for (int i = 0x0; i + 0x40 < jsonBytes.length; i++) {
+ index1 = (index1 + magicByteTranslated) % 0x100;
+ index2 = (index3 + newArray.get((index2 + newArray.get(index1)) % 0x100)) % 0x100;
+ index3 = (index3 + index1 + newArray.get(index1)) % 0x100;
+ swap1 = newArray.get(index1);
+ newArray.set(index1, newArray.get(index2));
+ newArray.set(index2,swap1);
+ xorNumber = newArray.get((index2 + newArray.get((index1 + newArray.get((xorNumber + index3) % 0x100)) % 0x100)) % 0x100);
+ decodedJsonString += Character.toString((char) signedToUnsigned((jsonBytes[i + 0x40] ^ xorNumber)));
+ }
+
+ return decodedJsonString;
+ }
+
+
+ private static long signedToUnsigned(int signed) {
+ return (byte) signed & 0xFF;
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaidudeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaidudeRipper.java
index 7950f0cf..24625859 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaidudeRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaidudeRipper.java
@@ -10,6 +10,7 @@ import org.jsoup.nodes.Document;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
@@ -50,12 +51,6 @@ public class HentaidudeRipper extends AbstractSingleFileRipper {
"Expected hqporner URL format: " + "hentaidude.com/VIDEO - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
@@ -84,7 +79,7 @@ public class HentaidudeRipper extends AbstractSingleFileRipper {
return hentaidudeThreadPool;
}
- private class HentaidudeDownloadThread extends Thread {
+ private class HentaidudeDownloadThread implements Runnable {
private URL url;
@@ -97,7 +92,7 @@ public class HentaidudeRipper extends AbstractSingleFileRipper {
public void run() {
try {
Document doc = Http.url(url).get();
- URL videoSourceUrl = new URL(getVideoUrl(doc));
+ URL videoSourceUrl = new URI(getVideoUrl(doc)).toURL();
addURLToDownload(videoSourceUrl, "", "", "", null, getVideoName(), "mp4");
} catch (Exception e) {
LOGGER.error("Could not get video url for " + getVideoName(), e);
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java
index a4e5895d..d6dba419 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoxRipper.java
@@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -12,7 +13,6 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
public class HentaifoxRipper extends AbstractHTMLRipper {
@@ -41,12 +41,6 @@ public class HentaifoxRipper extends AbstractHTMLRipper {
"https://hentaifox.com/gallery/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
LOGGER.info(doc);
@@ -59,9 +53,9 @@ public class HentaifoxRipper extends AbstractHTMLRipper {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
- Document doc = getFirstPage();
+ Document doc = getCachedFirstPage();
String title = doc.select("div.info > h1").first().text();
return getHost() + "_" + title + "_" + getGID(url);
} catch (Exception e) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiimageRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiimageRipper.java
index df7bfb96..45628e82 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiimageRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiimageRipper.java
@@ -52,13 +52,6 @@ public class HentaiimageRipper extends AbstractHTMLRipper {
"https://hentai-image.com/image/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HitomiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HitomiRipper.java
index 3196c139..d312b75b 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HitomiRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HitomiRipper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -13,7 +15,6 @@ import org.jsoup.nodes.Document;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
-import org.jsoup.nodes.Element;
public class HitomiRipper extends AbstractHTMLRipper {
@@ -35,20 +36,20 @@ public class HitomiRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("https://hitomi.la/galleries/([\\d]+).html");
+ Pattern p = Pattern.compile("https://hitomi.la/(cg|doujinshi|gamecg|manga)/(.+).html");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
galleryId = m.group(1);
return m.group(1);
}
throw new MalformedURLException("Expected hitomi URL format: " +
- "https://hitomi.la/galleries/ID.html - got " + url + " instead");
+ "https://hitomi.la/(cg|doujinshi|gamecg|manga)/ID.html - got " + url + " instead");
}
@Override
- public Document getFirstPage() throws IOException {
+ public Document getFirstPage() throws IOException, URISyntaxException {
// if we go to /GALLERYID.js we get a nice json array of all images in the gallery
- return Http.url(new URL(url.toExternalForm().replaceAll("hitomi", "ltn.hitomi").replaceAll(".html", ".js"))).ignoreContentType().get();
+ return Http.url(new URI(url.toExternalForm().replaceAll("hitomi", "ltn.hitomi").replaceAll(".html", ".js")).toURL()).ignoreContentType().get();
}
@@ -65,7 +66,7 @@ public class HitomiRipper extends AbstractHTMLRipper {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title and username as GID
Document doc = Http.url(url).get();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java
index 8d13f113..0f69c75b 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java
@@ -11,6 +11,8 @@ import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -63,9 +65,8 @@ public class HqpornerRipper extends AbstractHTMLRipper {
}
@Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
+ public Document getFirstPage() throws IOException, URISyntaxException {
+ return super.getFirstPage();
}
@Override
@@ -130,7 +131,7 @@ public class HqpornerRipper extends AbstractHTMLRipper {
return true;
}
- private class HqpornerDownloadThread extends Thread {
+ private class HqpornerDownloadThread implements Runnable {
private URL hqpornerVideoPageUrl;
//private int index;
@@ -164,10 +165,10 @@ public class HqpornerRipper extends AbstractHTMLRipper {
}
if (downloadUrl != null) {
- addURLToDownload(new URL(downloadUrl), "", subdirectory, "", null, getVideoName(), "mp4");
+ addURLToDownload(new URI(downloadUrl).toURL(), "", subdirectory, "", null, getVideoName(), "mp4");
}
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while downloading video.", e);
}
}
@@ -215,7 +216,7 @@ public class HqpornerRipper extends AbstractHTMLRipper {
try {
logger.info("Trying to download from unknown video host " + videoPageurl);
- URL url = new URL(videoPageurl);
+ URL url = new URI(videoPageurl).toURL();
Response response = Http.url(url).referrer(hqpornerVideoPageUrl).response();
Document doc = response.parse();
@@ -245,7 +246,7 @@ public class HqpornerRipper extends AbstractHTMLRipper {
}
}
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
logger.error("Unable to get video url using generic methods.");
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java
index 5b481258..15420655 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HypnohubRipper.java
@@ -46,12 +46,6 @@ public class HypnohubRipper extends AbstractHTMLRipper {
"hypnohub.net/pool/show/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
private String ripPost(String url) throws IOException {
LOGGER.info(url);
Document doc = Http.url(url).get();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java
deleted file mode 100644
index 062217b2..00000000
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java
+++ /dev/null
@@ -1,112 +0,0 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
-
-public class ImagearnRipper extends AbstractHTMLRipper {
-
- public ImagearnRipper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public String getHost() {
- return "imagearn";
- }
- @Override
- public String getDomain() {
- return "imagearn.com";
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^.*imagearn.com/+gallery.php\\?id=([0-9]+).*$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
- throw new MalformedURLException(
- "Expected imagearn.com gallery formats: "
- + "imagearn.com/gallery.php?id=####..."
- + " Got: " + url);
- }
-
- public URL sanitizeURL(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^.*imagearn.com/+image.php\\?id=[0-9]+.*$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- // URL points to imagearn *image*, not gallery
- try {
- url = getGalleryFromImage(url);
- } catch (Exception e) {
- LOGGER.error("[!] " + e.getMessage(), e);
- }
- }
- return url;
- }
-
- private URL getGalleryFromImage(URL url) throws IOException {
- Document doc = Http.url(url).get();
- for (Element link : doc.select("a[href~=^gallery\\.php.*$]")) {
- LOGGER.info("LINK: " + link.toString());
- if (link.hasAttr("href")
- && link.attr("href").contains("gallery.php")) {
- url = new URL("http://imagearn.com/" + link.attr("href"));
- LOGGER.info("[!] Found gallery from given link: " + url);
- return url;
- }
- }
- throw new IOException("Failed to find gallery at URL " + url);
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
- @Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
- try {
- Document doc = getFirstPage();
- String title = doc.select("h3 > strong").first().text(); // profile name
- return getHost() + "_" + title + "_" + getGID(url);
- } catch (Exception e) {
- // Fall back to default album naming convention
- LOGGER.warn("Failed to get album title from " + url, e);
- }
- return super.getAlbumTitle(url);
- }
-
- @Override
- public List getURLsFromPage(Document doc) {
- List imageURLs = new ArrayList<>();
- for (Element thumb : doc.select("div#gallery > div > a")) {
- String imageURL = thumb.attr("href");
- try {
- Document imagedoc = new Http("http://imagearn.com/" + imageURL).get();
- String image = imagedoc.select("a.thickbox").first().attr("href");
- imageURLs.add(image);
- } catch (IOException e) {
- LOGGER.warn("Was unable to download page: " + imageURL);
- }
- }
- return imageURLs;
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index));
- sleep(1000);
- }
-}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java
index 3aca67cf..0699273f 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java
@@ -6,20 +6,24 @@ import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ImagebamRipper extends AbstractHTMLRipper {
- // Current HTML document
- private Document albumDoc = null;
-
// Thread pool for finding direct image links from "image" pages (html)
private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam");
@Override
@@ -45,7 +49,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
Pattern p;
Matcher m;
- p = Pattern.compile("^https?://[wm.]*imagebam.com/gallery/([a-zA-Z0-9]+).*$");
+ p = Pattern.compile("^https?://[wm.]*imagebam.com/(gallery|view)/([a-zA-Z0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
@@ -57,14 +61,6 @@ public class ImagebamRipper extends AbstractHTMLRipper {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = Http.url(url).get();
- }
- return albumDoc;
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
@@ -80,7 +76,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
- for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) {
+ for (Element thumb : doc.select("div > a[class=thumbnail]:not(.footera)")) {
imageURLs.add(thumb.attr("href"));
}
return imageURLs;
@@ -94,18 +90,15 @@ public class ImagebamRipper extends AbstractHTMLRipper {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title as GID
- Elements elems = getFirstPage().select("legend");
+ Elements elems = getCachedFirstPage().select("[id=gallery-name]");
String title = elems.first().text();
LOGGER.info("Title text: '" + title + "'");
- Pattern p = Pattern.compile("^(.*)\\s\\d* image.*$");
- Matcher m = p.matcher(title);
- if (m.matches()) {
- return getHost() + "_" + getGID(url) + " (" + m.group(1).trim() + ")";
+ if (StringUtils.isNotBlank(title)) {
+ return getHost() + "_" + getGID(url) + " (" + title + ")";
}
- LOGGER.info("Doesn't match " + p.pattern());
} catch (Exception e) {
// Fall back to default album naming convention
LOGGER.warn("Failed to get album title from " + url, e);
@@ -118,9 +111,9 @@ public class ImagebamRipper extends AbstractHTMLRipper {
*
* Handles case when site has IP-banned the user.
*/
- private class ImagebamImageThread extends Thread {
- private URL url; //link to "image page"
- private int index; //index in album
+ private class ImagebamImageThread implements Runnable {
+ private final URL url; //link to "image page"
+ private final int index; //index in album
ImagebamImageThread(URL url, int index) {
super();
@@ -138,19 +131,19 @@ public class ImagebamRipper extends AbstractHTMLRipper {
*/
private void fetchImage() {
try {
- Document doc = Http.url(url).get();
+ Map cookies = new HashMap<>();
+ cookies.put("nsfw_inter", "1");
+ Document doc = Jsoup.connect(url.toString())
+ .cookies(cookies)
+ .get();
+
// Find image
Elements metaTags = doc.getElementsByTag("meta");
String imgsrc = "";//initialize, so no NullPointerExceptions should ever happen.
-
- for (Element metaTag: metaTags) {
- //the direct link to the image seems to always be linked in the part of the html.
- if (metaTag.attr("property").equals("og:image")) {
- imgsrc = metaTag.attr("content");
- LOGGER.info("Found URL " + imgsrc);
- break;//only one (useful) image possible for an "image page".
- }
+ Elements elem = doc.select("img[class*=main-image]");
+ if ((elem != null) && (elem.size() > 0)) {
+ imgsrc = elem.first().attr("src");
}
//for debug, or something goes wrong.
@@ -165,8 +158,8 @@ public class ImagebamRipper extends AbstractHTMLRipper {
prefix = String.format("%03d_", index);
}
- addURLToDownload(new URL(imgsrc), prefix);
- } catch (IOException e) {
+ addURLToDownload(new URI(imgsrc).toURL(), prefix);
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java
index f097e667..4fcf2201 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java
@@ -1,8 +1,13 @@
package com.rarchives.ripme.ripper.rippers;
+import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
+import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@@ -10,14 +15,26 @@ import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class ImagefapRipper extends AbstractHTMLRipper {
- private Document albumDoc = null;
- private boolean isNewAlbumType = false;
+ private int callsMade = 0;
+ private long startTime = System.nanoTime();
+
+ private static final int RETRY_LIMIT = 10;
+ private static final int HTTP_RETRY_LIMIT = 3;
+ private static final int RATE_LIMIT_HOUR = 1000;
+
+ // All sleep times are in milliseconds
+ private static final int PAGE_SLEEP_TIME = 60 * 60 * 1000 / RATE_LIMIT_HOUR;
+ private static final int IMAGE_SLEEP_TIME = 60 * 60 * 1000 / RATE_LIMIT_HOUR;
+ // Timeout when blocked = 1 hours. Retry every retry within the hour mark + 1 time after the hour mark.
+ private static final int IP_BLOCK_SLEEP_TIME = (int) Math.round((double) 60 / (RETRY_LIMIT - 1) * 60 * 1000);
public ImagefapRipper(URL url) throws IOException {
super(url);
@@ -36,54 +53,40 @@ public class ImagefapRipper extends AbstractHTMLRipper {
* Reformat given URL into the desired format (all images on single page)
*/
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String gid = getGID(url);
- String newURL = "http://www.imagefap.com/gallery.php?";
- if (isNewAlbumType) {
- newURL += "p";
- }
- newURL += "gid=" + gid + "&view=2";
+ String newURL = "https://www.imagefap.com/pictures/" + gid + "/random-string";
LOGGER.debug("Changed URL from " + url + " to " + newURL);
- return new URL(newURL);
+ return new URI(newURL).toURL();
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p; Matcher m;
+ // Old format (I suspect no longer supported)
p = Pattern.compile("^.*imagefap.com/gallery.php\\?pgid=([a-f0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
- isNewAlbumType = true;
return m.group(1);
}
+
p = Pattern.compile("^.*imagefap.com/gallery.php\\?gid=([0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
- p = Pattern.compile("^.*imagefap.com/pictures/([0-9]+).*$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
- p = Pattern.compile("^.*imagefap.com/pictures/([a-f0-9]+).*$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- isNewAlbumType = true;
- return m.group(1);
- }
-
- p = Pattern.compile("^.*imagefap.com/gallery/([0-9]+).*$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(1);
- }
p = Pattern.compile("^.*imagefap.com/gallery/([a-f0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
- isNewAlbumType = true;
+ return m.group(1);
+ }
+
+ // most recent format
+ p = Pattern.compile("^.*imagefap.com/pictures/([a-f0-9]+).*$");
+ m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
return m.group(1);
}
@@ -96,41 +99,72 @@ public class ImagefapRipper extends AbstractHTMLRipper {
@Override
public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = Http.url(url).get();
- }
- return albumDoc;
+
+ Document firstPage = getPageWithRetries(url);
+
+ sendUpdate(STATUS.LOADING_RESOURCE, "Loading first page...");
+
+ return firstPage;
}
@Override
- public Document getNextPage(Document doc) throws IOException {
+ public Document getNextPage(Document doc) throws IOException, URISyntaxException {
String nextURL = null;
for (Element a : doc.select("a.link3")) {
if (a.text().contains("next")) {
- nextURL = "http://imagefap.com/gallery.php" + a.attr("href");
+ nextURL = this.sanitizeURL(this.url) + a.attr("href");
break;
}
}
if (nextURL == null) {
throw new IOException("No next page found");
}
- sleep(1000);
- return Http.url(nextURL).get();
+ // Sleep before fetching next page.
+ sleep(PAGE_SLEEP_TIME);
+
+ sendUpdate(STATUS.LOADING_RESOURCE, "Loading next page URL: " + nextURL);
+ LOGGER.info("Attempting to load next page URL: " + nextURL);
+
+ // Load next page
+ Document nextPage = getPageWithRetries(new URI(nextURL).toURL());
+
+ return nextPage;
}
@Override
public List getURLsFromPage(Document doc) {
+
List imageURLs = new ArrayList<>();
+
+ LOGGER.debug("Trying to get URLs from document... ");
+
for (Element thumb : doc.select("#gallery img")) {
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
continue;
}
String image = getFullSizedImage("https://www.imagefap.com" + thumb.parent().attr("href"));
+
+ if (image == null) {
+ for (int i = 0; i < HTTP_RETRY_LIMIT; i++) {
+ image = getFullSizedImage("https://www.imagefap.com" + thumb.parent().attr("href"));
+ if (image != null) {
+ break;
+ }
+ sleep(PAGE_SLEEP_TIME);
+ }
+ if (image == null)
+ throw new RuntimeException("Unable to extract image URL from single image page! Unable to continue");
+ }
+
+ LOGGER.debug("Adding imageURL: '" + image + "'");
+
imageURLs.add(image);
if (isThisATest()) {
break;
}
}
+ LOGGER.debug("Adding " + imageURLs.size() + " URLs to download");
+
return imageURLs;
}
@@ -141,10 +175,10 @@ public class ImagefapRipper extends AbstractHTMLRipper {
}
@Override
- public String getAlbumTitle(URL url) throws MalformedURLException {
+ public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
try {
// Attempt to use album title as GID
- String title = getFirstPage().title();
+ String title = getCachedFirstPage().title();
title = title.replace("Porn Pics & Porn GIFs", "");
title = title.replace(" ", "_");
String toReturn = getHost() + "_" + title + "_" + getGID(url);
@@ -156,11 +190,128 @@ public class ImagefapRipper extends AbstractHTMLRipper {
private String getFullSizedImage(String pageURL) {
try {
- Document doc = Http.url(pageURL).get();
- return doc.select("img#mainPhoto").attr("src");
- } catch (IOException e) {
+ // Sleep before fetching image.
+ sleep(IMAGE_SLEEP_TIME);
+
+ Document doc = getPageWithRetries(new URI(pageURL).toURL());
+
+ String framedPhotoUrl = doc.select("img#mainPhoto").attr("data-src");
+
+ // we use a no query param version of the URL to reduce failure rate because of some query params that change between the li elements and the mainPhotoURL
+ String noQueryPhotoUrl = framedPhotoUrl.split("\\?")[0];
+
+ LOGGER.debug("noQueryPhotoUrl: " + noQueryPhotoUrl);
+
+ // we look for a li > a element who's framed attribute starts with the noQueryPhotoUrl (only reference in the page to the full URL)
+ Elements selectedItem = doc.select("ul.thumbs > li > a[framed^='"+noQueryPhotoUrl+"']");
+
+ // the fullsize URL is in the href attribute
+ String fullSizedUrl = selectedItem.attr("href");
+
+ if("".equals(fullSizedUrl))
+ throw new IOException("JSoup full URL extraction failed from '" + selectedItem.html() + "'");
+
+ LOGGER.debug("fullSizedUrl: " + fullSizedUrl);
+
+ return fullSizedUrl;
+
+ } catch (IOException | URISyntaxException e) {
+ LOGGER.debug("Unable to get full size image URL from page: " + pageURL + " because: " + e.getMessage());
return null;
}
}
+ /**
+ * Attempts to get page, checks for IP ban, waits.
+ * @param url
+ * @return Page document
+ * @throws IOException If page loading errors, or if retries are exhausted
+ */
+ private Document getPageWithRetries(URL url) throws IOException {
+ Document doc = null;
+ int retries = RETRY_LIMIT;
+ while (true) {
+
+ sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
+
+ // For debugging rate limit checker. Useful to track wheter the timeout should be altered or not.
+ callsMade++;
+ checkRateLimit();
+
+ LOGGER.info("Retrieving " + url);
+
+ boolean httpCallThrottled = false;
+ int httpAttempts = 0;
+
+ // we attempt the http call, knowing it can fail for network reasons
+ while(true) {
+ httpAttempts++;
+ try {
+ doc = Http.url(url).get();
+ } catch(IOException e) {
+
+ LOGGER.info("Retrieving " + url + " error: " + e.getMessage());
+
+ if(e.getMessage().contains("404"))
+ throw new IOException("Gallery/Page not found!");
+
+ if(httpAttempts < HTTP_RETRY_LIMIT) {
+ sendUpdate(STATUS.DOWNLOAD_WARN, "HTTP call failed: " + e.getMessage() + " retrying " + httpAttempts + " / " + HTTP_RETRY_LIMIT);
+
+ // we sleep for a few seconds
+ sleep(PAGE_SLEEP_TIME);
+ continue;
+ } else {
+ sendUpdate(STATUS.DOWNLOAD_WARN, "HTTP call failed too many times: " + e.getMessage() + " treating this as a throttle");
+ httpCallThrottled = true;
+ }
+ }
+ // no errors, we exit
+ break;
+ }
+
+ if (httpCallThrottled || (doc != null && doc.toString().contains("Your IP made too many requests to our servers and we need to check that you are a real human being"))) {
+ if (retries == 0) {
+ throw new IOException("Hit rate limit and maximum number of retries, giving up");
+ }
+ String message = "Probably hit rate limit while loading " + url + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining";
+ LOGGER.warn(message);
+ sendUpdate(STATUS.DOWNLOAD_WARN, message);
+ retries--;
+ try {
+ Thread.sleep(IP_BLOCK_SLEEP_TIME);
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted while waiting for rate limit to subside");
+ }
+ } else {
+ return doc;
+ }
+ }
+ }
+
+ /**
+ * Used for debugging the rate limit issue.
+ * This in order to prevent hitting the rate limit altoghether by remaining under the limit threshold.
+ * @return Long duration
+ */
+ private long checkRateLimit() {
+ long endTime = System.nanoTime();
+ long duration = (endTime - startTime) / 1000000;
+
+ int rateLimitMinute = 100;
+ int rateLimitFiveMinutes = 200;
+ int rateLimitHour = RATE_LIMIT_HOUR; // Request allowed every 3.6 seconds.
+
+ if(duration / 1000 < 60){
+ LOGGER.debug("Rate limit: " + (rateLimitMinute - callsMade) + " calls remaining for first minute mark.");
+ } else if(duration / 1000 < 300){
+ LOGGER.debug("Rate limit: " + (rateLimitFiveMinutes - callsMade) + " calls remaining for first 5 minute mark.");
+ } else if(duration / 1000 < 3600){
+ LOGGER.debug("Rate limit: " + (RATE_LIMIT_HOUR - callsMade) + " calls remaining for first hour mark.");
+ }
+
+ return duration;
+ }
+
+
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java
index f50a84a0..4691c7c6 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -56,11 +58,6 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
for (Element thumb : doc.select("a[target=_blank]")) {
@@ -79,9 +76,9 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
*
* Handles case when site has IP-banned the user.
*/
- private class ImagevenueImageThread extends Thread {
- private URL url;
- private int index;
+ private class ImagevenueImageThread implements Runnable {
+ private final URL url;
+ private final int index;
ImagevenueImageThread(URL url, int index) {
super();
@@ -113,8 +110,8 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
- addURLToDownload(new URL(imgsrc), prefix);
- } catch (IOException e) {
+ addURLToDownload(new URI(imgsrc).toURL(), prefix);
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java
index f3050a13..b32fcad4 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java
@@ -40,10 +40,6 @@ public class ImgboxRipper extends AbstractHTMLRipper {
"imgbox.com/g/albumid - got " + url + "instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java
index 93cb809e..4904ac60 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java
@@ -1,10 +1,14 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -15,15 +19,15 @@ import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.safety.Whitelist;
+import org.jsoup.safety.Safelist;
import org.jsoup.select.Elements;
-import com.rarchives.ripme.ripper.AlbumRipper;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
-public class ImgurRipper extends AlbumRipper {
+public class ImgurRipper extends AbstractHTMLRipper {
private static final String DOMAIN = "imgur.com",
HOST = "imgur";
@@ -38,7 +42,6 @@ public class ImgurRipper extends AlbumRipper {
USER_ALBUM,
USER_IMAGES,
SINGLE_IMAGE,
- SERIES_OF_IMAGES,
SUBREDDIT
}
@@ -58,6 +61,7 @@ public class ImgurRipper extends AlbumRipper {
return albumType == ALBUM_TYPE.USER;
}
+ @Override
public boolean canRip(URL url) {
if (!url.getHost().endsWith(DOMAIN)) {
return false;
@@ -71,7 +75,24 @@ public class ImgurRipper extends AlbumRipper {
return true;
}
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ @Override
+ protected String getDomain() {
+ return DOMAIN;
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ // No-op as we override rip() method
+ }
+
+ @Override
+ protected List getURLsFromPage(Document page) {
+ // No-op as we override rip() method
+ return Arrays.asList();
+ }
+
+ @Override
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String u = url.toExternalForm();
if (u.indexOf('#') >= 0) {
u = u.substring(0, u.indexOf('#'));
@@ -79,11 +100,17 @@ public class ImgurRipper extends AlbumRipper {
u = u.replace("imgur.com/gallery/", "imgur.com/a/");
u = u.replace("https?://m\\.imgur\\.com", "http://imgur.com");
u = u.replace("https?://i\\.imgur\\.com", "http://imgur.com");
- return new URL(u);
+ return new URI(u).toURL();
}
+ @Override
public String getAlbumTitle(URL url) throws MalformedURLException {
- String gid = getGID(url);
+ String gid = null;
+ try {
+ gid = getGID(url);
+ } catch (URISyntaxException e) {
+ throw new MalformedURLException(e.getMessage());
+ }
if (this.albumType == ALBUM_TYPE.ALBUM) {
try {
// Attempt to use album title as GID
@@ -91,7 +118,7 @@ public class ImgurRipper extends AlbumRipper {
albumDoc = Http.url(url).get();
}
- Elements elems = null;
+ Elements elems;
/*
// TODO: Add config option for including username in album title.
@@ -106,15 +133,13 @@ public class ImgurRipper extends AlbumRipper {
}
*/
- String title = null;
+ String title;
final String defaultTitle1 = "Imgur: The most awesome images on the Internet";
final String defaultTitle2 = "Imgur: The magic of the Internet";
LOGGER.info("Trying to get album title");
elems = albumDoc.select("meta[property=og:title]");
- if (elems != null) {
- title = elems.attr("content");
- LOGGER.debug("Title is " + title);
- }
+ title = elems.attr("content");
+ LOGGER.debug("Title is " + title);
// This is here encase the album is unnamed, to prevent
// Imgur: The most awesome images on the Internet from being added onto the album name
if (title.contains(defaultTitle1) || title.contains(defaultTitle2)) {
@@ -124,27 +149,17 @@ public class ImgurRipper extends AlbumRipper {
title = "";
LOGGER.debug("Trying to use title tag to get title");
elems = albumDoc.select("title");
- if (elems != null) {
- if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
- LOGGER.debug("Was unable to get album title or album was untitled");
- }
- else {
- title = elems.text();
- }
+ if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
+ LOGGER.debug("Was unable to get album title or album was untitled");
+ }
+ else {
+ title = elems.text();
}
}
String albumTitle = "imgur_";
- /*
- // TODO: Add config option (see above)
- if (user != null) {
- albumTitle += "user_" + user;
- }
- */
albumTitle += gid;
- if (title != null) {
- albumTitle += "_" + title;
- }
+ albumTitle += "_" + title;
return albumTitle;
} catch (IOException e) {
@@ -156,118 +171,83 @@ public class ImgurRipper extends AlbumRipper {
@Override
public void rip() throws IOException {
- switch (albumType) {
- case ALBUM:
- // Fall-through
- case USER_ALBUM:
- LOGGER.info("Album type is USER_ALBUM");
- // Don't call getAlbumTitle(this.url) with this
- // as it seems to cause the album to be downloaded to a subdir.
- ripAlbum(this.url);
- break;
- case SERIES_OF_IMAGES:
- LOGGER.info("Album type is SERIES_OF_IMAGES");
- ripAlbum(this.url);
- break;
- case SINGLE_IMAGE:
- LOGGER.info("Album type is SINGLE_IMAGE");
- ripSingleImage(this.url);
- break;
- case USER:
- LOGGER.info("Album type is USER");
- ripUserAccount(url);
- break;
- case SUBREDDIT:
- LOGGER.info("Album type is SUBREDDIT");
- ripSubreddit(url);
- break;
- case USER_IMAGES:
- LOGGER.info("Album type is USER_IMAGES");
- ripUserImages(url);
- break;
+ try {
+ switch (albumType) {
+ case ALBUM:
+ // Fall-through
+ case USER_ALBUM:
+ LOGGER.info("Album type is USER_ALBUM");
+ // Don't call getAlbumTitle(this.url) with this
+ // as it seems to cause the album to be downloaded to a subdir.
+ ripAlbum(this.url);
+ break;
+ case SINGLE_IMAGE:
+ LOGGER.info("Album type is SINGLE_IMAGE");
+ ripSingleImage(this.url);
+ break;
+ case USER:
+ LOGGER.info("Album type is USER");
+ ripUserAccount(url);
+ break;
+ case SUBREDDIT:
+ LOGGER.info("Album type is SUBREDDIT");
+ ripSubreddit(url);
+ break;
+ case USER_IMAGES:
+ LOGGER.info("Album type is USER_IMAGES");
+ ripUserImages(url);
+ break;
+ }
+ } catch (URISyntaxException e) {
+ throw new IOException("Failed ripping " + this.url, e);
}
waitForThreads();
}
- private void ripSingleImage(URL url) throws IOException {
+ private void ripSingleImage(URL url) throws IOException, URISyntaxException {
String strUrl = url.toExternalForm();
- Document document = getDocument(strUrl);
- Matcher m = getEmbeddedJsonMatcher(document);
- if (m.matches()) {
- JSONObject json = new JSONObject(m.group(1)).getJSONObject("image");
- addURLToDownload(extractImageUrlFromJson(json), "");
+ var gid = getGID(url);
+ var json = getSingleImageData(String.format("https://api.imgur.com/post/v1/media/%s?include=media,adconfig,account", gid));
+ var media = json.getJSONArray("media");
+ if (media.length()==0) {
+ throw new IOException(String.format("Failed to fetch image for url %s", strUrl));
+ }
+ if (media.length()>1) {
+ LOGGER.warn(String.format("Got multiple images for url %s", strUrl));
}
+ addURLToDownload(extractImageUrlFromJson((JSONObject)media.get(0)), "");
}
- private void ripAlbum(URL url) throws IOException {
+ private void ripAlbum(URL url) throws IOException, URISyntaxException {
ripAlbum(url, "");
}
- private void ripAlbum(URL url, String subdirectory) throws IOException {
- int index = 0;
+ private void ripAlbum(URL url, String subdirectory) throws IOException, URISyntaxException {
+ int index;
this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
index = 0;
ImgurAlbum album = getImgurAlbum(url);
for (ImgurImage imgurImage : album.images) {
stopCheck();
- String saveAs = workingDir.getCanonicalPath();
- if (!saveAs.endsWith(File.separator)) {
- saveAs += File.separator;
- }
+ Path saveAs = workingDir.toPath();
if (subdirectory != null && !subdirectory.equals("")) {
- saveAs += subdirectory;
+ saveAs = saveAs.resolve(subdirectory);
}
- if (!saveAs.endsWith(File.separator)) {
- saveAs += File.separator;
- }
- File subdirFile = new File(saveAs);
- if (!subdirFile.exists()) {
- subdirFile.mkdirs();
+ if (!Files.exists(saveAs)) {
+ Files.createDirectory(saveAs);
}
index += 1;
+ var imgPath = imgurImage.getSaveAs().replaceAll("\\?\\d", "");
if (Utils.getConfigBoolean("download.save_order", true)) {
- saveAs += String.format("%03d_", index);
+ saveAs = saveAs.resolve(String.format("%03d_%s", index, imgPath));
+ } else {
+ saveAs = saveAs.resolve(imgPath);
}
- saveAs += imgurImage.getSaveAs();
- saveAs = saveAs.replaceAll("\\?\\d", "");
- addURLToDownload(imgurImage.url, new File(saveAs));
+ addURLToDownload(imgurImage.url, saveAs);
}
}
- public static ImgurAlbum getImgurSeries(URL url) throws IOException {
- Pattern p = Pattern.compile("^.*imgur\\.com/([a-zA-Z0-9,]*).*$");
- Matcher m = p.matcher(url.toExternalForm());
- ImgurAlbum album = new ImgurAlbum(url);
- if (m.matches()) {
- String[] imageIds = m.group(1).split(",");
- for (String imageId : imageIds) {
- // TODO: Fetch image with ID imageId
- LOGGER.debug("Fetching image info for ID " + imageId);
- try {
- JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
- if (!json.has("image")) {
- continue;
- }
- JSONObject image = json.getJSONObject("image");
- if (!image.has("links")) {
- continue;
- }
- JSONObject links = image.getJSONObject("links");
- if (!links.has("original")) {
- continue;
- }
- String original = links.getString("original");
- ImgurImage theImage = new ImgurImage(new URL(original));
- album.addImage(theImage);
- } catch (Exception e) {
- LOGGER.error("Got exception while fetching imgur ID " + imageId, e);
- }
- }
- }
- return album;
- }
-
- public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
+ public static ImgurAlbum getImgurAlbum(URL url) throws IOException, URISyntaxException {
String strUrl = url.toExternalForm();
if (!strUrl.contains(",")) {
strUrl += "/all";
@@ -275,13 +255,11 @@ public class ImgurRipper extends AlbumRipper {
LOGGER.info(" Retrieving " + strUrl);
Document doc = getAlbumData("https://api.imgur.com/3/album/" + strUrl.split("/a/")[1]);
// Try to use embedded JSON to retrieve images
- LOGGER.info(Jsoup.clean(doc.body().toString(), Whitelist.none()));
-
try {
- JSONObject json = new JSONObject(Jsoup.clean(doc.body().toString(), Whitelist.none()));
+ JSONObject json = new JSONObject(Jsoup.clean(doc.body().toString(), Safelist.none()));
JSONArray jsonImages = json.getJSONObject("data").getJSONArray("images");
return createImgurAlbumFromJsonArray(url, jsonImages);
- } catch (JSONException e) {
+ } catch (JSONException | URISyntaxException e) {
LOGGER.debug("Error while parsing JSON at " + url + ", continuing", e);
}
@@ -309,54 +287,48 @@ public class ImgurRipper extends AlbumRipper {
image = "http:" + thumb.select("img").attr("src");
} else {
// Unable to find image in this div
- LOGGER.error("[!] Unable to find image in div: " + thumb.toString());
+ LOGGER.error("[!] Unable to find image in div: " + thumb);
continue;
}
if (image.endsWith(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
image = image.replace(".gif", ".mp4");
}
- ImgurImage imgurImage = new ImgurImage(new URL(image));
+ ImgurImage imgurImage = new ImgurImage(new URI(image).toURL());
imgurAlbum.addImage(imgurImage);
}
return imgurAlbum;
}
- private static Matcher getEmbeddedJsonMatcher(Document doc) {
- Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL);
- return p.matcher(doc.body().html());
- }
-
- private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException {
+ private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException, URISyntaxException {
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
int imagesLength = jsonImages.length();
for (int i = 0; i < imagesLength; i++) {
JSONObject ob = jsonImages.getJSONObject(i);
- imgurAlbum.addImage(new ImgurImage( new URL(ob.getString("link"))));
+ imgurAlbum.addImage(new ImgurImage( new URI(ob.getString("link")).toURL()));
}
return imgurAlbum;
}
- private static ImgurImage createImgurImageFromJson(JSONObject json) throws MalformedURLException {
- return new ImgurImage(extractImageUrlFromJson(json));
- }
-
- private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException {
+ private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException, URISyntaxException {
String ext = json.getString("ext");
+ if (!ext.startsWith(".")) {
+ ext = "." + ext;
+ }
if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
ext = ".mp4";
}
- return new URL(
- "http://i.imgur.com/"
- + json.getString("hash")
- + ext);
+ return new URI(
+ "https://i.imgur.com/"
+ + json.getString("id")
+ + ext).toURL();
}
- private static Document getDocument(String strUrl) throws IOException {
- return Jsoup.connect(strUrl)
+ private static JSONObject getSingleImageData(String strUrl) throws IOException {
+ return Http.url(strUrl)
.userAgent(USER_AGENT)
.timeout(10 * 1000)
- .maxBodySize(0)
- .get();
+ .header("Authorization", "Client-ID " + Utils.getConfigString("imgur.client_id", "546c25a59c58ad7"))
+ .getJSON();
}
private static Document getAlbumData(String strUrl) throws IOException {
@@ -369,35 +341,71 @@ public class ImgurRipper extends AlbumRipper {
.get();
}
+ private static JSONObject getUserData(String userUrl) throws IOException {
+ return Http.url(userUrl)
+ .userAgent(USER_AGENT)
+ .timeout(10 * 1000)
+ .header("Authorization", "Client-ID " + Utils.getConfigString("imgur.client_id", "546c25a59c58ad7"))
+ .getJSON();
+ }
+
/**
* Rips all albums in an imgur user's account.
* @param url
- * URL to imgur user account (http://username.imgur.com)
- * @throws IOException
+ * URL to imgur user account (http://username.imgur.com | https://imgur.com/user/username)
*/
- private void ripUserAccount(URL url) throws IOException {
+ private void ripUserAccount(URL url) throws IOException, URISyntaxException {
+ int cPage = -1, cImage = 0;
+ String apiUrl = "https://api.imgur.com/3/account/%s/submissions/%d/newest?album_previews=1";
+ // Strip 'user_' from username
+ var username = getGID(url).replace("user_", "");
LOGGER.info("Retrieving " + url);
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
- Document doc = Http.url(url).get();
- for (Element album : doc.select("div.cover a")) {
- stopCheck();
- if (!album.hasAttr("href")
- || !album.attr("href").contains("imgur.com/a/")) {
- continue;
+
+ while (true) {
+ cPage += 1;
+ var pageUrl = String.format(apiUrl, username, cPage);
+ var json = getUserData(pageUrl);
+ var success = json.getBoolean("success");
+ var status = json.getInt("status");
+ if (!success || status!=200) {
+ throw new IOException(String.format("Unexpected status code %d for url %s and page %d", status, url, cPage));
}
- String albumID = album.attr("href").substring(album.attr("href").lastIndexOf('/') + 1);
- URL albumURL = new URL("http:" + album.attr("href") + "/noscript");
- try {
- ripAlbum(albumURL, albumID);
- Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000);
- } catch (Exception e) {
- LOGGER.error("Error while ripping album: " + e.getMessage(), e);
+ var data = json.getJSONArray("data");
+ if (data.isEmpty()) {
+ // Data array is empty for pages beyond the last page
+ break;
+ }
+ for (int i = 0; i < data.length(); i++) {
+ cImage += 1;
+ String prefixOrSubdir = "";
+ if (Utils.getConfigBoolean("download.save_order", true)) {
+ prefixOrSubdir = String.format("%03d_", cImage);
+ }
+ var d = (JSONObject)data.get(i);
+ var l = d.getString("link");
+ if (d.getBoolean("is_album")) {
+ // For album links with multiple images create a prefixed folder with album id
+ prefixOrSubdir += d.getString("id");
+ ripAlbum(new URI(l).toURL(), prefixOrSubdir);
+ try {
+ Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000L);
+ } catch (InterruptedException e) {
+ LOGGER.error(String.format("Error! Interrupted ripping album %s for user account %s", l, username), e);
+ }
+ } else {
+ // For direct links
+ if (d.has("mp4") && Utils.getConfigBoolean("prefer.mp4", false)) {
+ l = d.getString("mp4");
+ }
+ addURLToDownload(new URI(l).toURL(), prefixOrSubdir);
+ }
}
}
}
- private void ripUserImages(URL url) throws IOException {
+ private void ripUserImages(URL url) {
int page = 0; int imagesFound = 0; int imagesTotal = 0;
String jsonUrl = url.toExternalForm().replace("/all", "/ajax/images");
if (jsonUrl.contains("#")) {
@@ -417,12 +425,12 @@ public class ImgurRipper extends AlbumRipper {
for (int i = 0; i < images.length(); i++) {
imagesFound++;
JSONObject image = images.getJSONObject(i);
- String imageUrl = "http://i.imgur.com/" + image.getString("hash") + image.getString("ext");
+ String imageUrl = "https://i.imgur.com/" + image.getString("hash") + image.getString("ext");
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", imagesFound);
}
- addURLToDownload(new URL(imageUrl), prefix);
+ addURLToDownload(new URI(imageUrl).toURL(), prefix);
}
if (imagesFound >= imagesTotal) {
break;
@@ -435,7 +443,7 @@ public class ImgurRipper extends AlbumRipper {
}
}
- private void ripSubreddit(URL url) throws IOException {
+ private void ripSubreddit(URL url) throws IOException, URISyntaxException {
int page = 0;
while (true) {
stopCheck();
@@ -455,7 +463,7 @@ public class ImgurRipper extends AlbumRipper {
if (image.contains("b.")) {
image = image.replace("b.", ".");
}
- URL imageURL = new URL(image);
+ URL imageURL = new URI(image).toURL();
addURLToDownload(imageURL);
}
if (imgs.isEmpty()) {
@@ -477,29 +485,30 @@ public class ImgurRipper extends AlbumRipper {
}
@Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p = null;
- Matcher m = null;
+ public String getGID(URL url) throws MalformedURLException, URISyntaxException {
+ Pattern p;
+ Matcher m;
- p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery)/([a-zA-Z0-9]{5,}).*$");
+ p = Pattern.compile("^https?://(?:www\\.|m\\.)?imgur\\.com/gallery/(?:(?:[a-zA-Z0-9]*/)?.*-)?([a-zA-Z0-9]+)$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur album or gallery
albumType = ALBUM_TYPE.ALBUM;
String gid = m.group(m.groupCount());
- this.url = new URL("http://imgur.com/a/" + gid);
+ this.url = new URI("https://imgur.com/a/" + gid).toURL();
return gid;
}
- p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery|t)/[a-zA-Z0-9]*/([a-zA-Z0-9]{5,}).*$");
+ // Match urls with path /a
+ p = Pattern.compile("^https?://(?:www\\.|m\\.)?imgur\\.com/(?:a|t)/(?:(?:[a-zA-Z0-9]*/)?.*-)?([a-zA-Z0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur album or gallery
albumType = ALBUM_TYPE.ALBUM;
String gid = m.group(m.groupCount());
- this.url = new URL("http://imgur.com/a/" + gid);
+ this.url = new URI("https://imgur.com/a/" + gid).toURL();
return gid;
}
- p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/?$");
+ p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{4,})\\.imgur\\.com/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Root imgur account
@@ -510,6 +519,14 @@ public class ImgurRipper extends AlbumRipper {
albumType = ALBUM_TYPE.USER;
return "user_" + gid;
}
+ // Pattern for new imgur user url https://imgur.com/user/username
+ p = Pattern.compile("^https?://(?:www\\.|m\\.)?imgur\\.com/user/([a-zA-Z0-9]+).*$");
+ m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ String gid = m.group(1);
+ albumType = ALBUM_TYPE.USER;
+ return "user_" + gid;
+ }
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/all.*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
@@ -529,13 +546,13 @@ public class ImgurRipper extends AlbumRipper {
if (m.matches()) {
// Imgur subreddit aggregator
albumType = ALBUM_TYPE.SUBREDDIT;
- String album = m.group(2);
+ StringBuilder album = new StringBuilder(m.group(2));
for (int i = 3; i <= m.groupCount(); i++) {
if (m.group(i) != null) {
- album += "_" + m.group(i).replace("/", "");
+ album.append("_").append(m.group(i).replace("/", ""));
}
}
- return album;
+ return album.toString();
}
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/r/(\\w+)/([a-zA-Z0-9,]{5,}).*$");
m = p.matcher(url.toExternalForm());
@@ -544,7 +561,7 @@ public class ImgurRipper extends AlbumRipper {
albumType = ALBUM_TYPE.ALBUM;
String subreddit = m.group(m.groupCount() - 1);
String gid = m.group(m.groupCount());
- this.url = new URL("http://imgur.com/r/" + subreddit + "/" + gid);
+ this.url = new URI("https://imgur.com/r/" + subreddit + "/" + gid).toURL();
return "r_" + subreddit + "_" + gid;
}
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9]{5,})$");
@@ -554,29 +571,14 @@ public class ImgurRipper extends AlbumRipper {
albumType = ALBUM_TYPE.SINGLE_IMAGE;
return m.group(m.groupCount());
}
- p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
- m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- // Series of imgur images
- albumType = ALBUM_TYPE.SERIES_OF_IMAGES;
- String gid = m.group(m.groupCount());
- if (!gid.contains(",")) {
- throw new MalformedURLException("Imgur image doesn't contain commas");
- }
- return gid.replaceAll(",", "-");
- }
throw new MalformedURLException("Unsupported imgur URL format: " + url.toExternalForm());
}
- public ALBUM_TYPE getAlbumType() {
- return albumType;
- }
-
public static class ImgurImage {
String title = "";
String description = "";
- String extension = "";
- public URL url = null;
+ String extension;
+ public URL url;
ImgurImage(URL url) {
this.url = url;
@@ -586,14 +588,7 @@ public class ImgurRipper extends AlbumRipper {
this.extension = this.extension.substring(0, this.extension.indexOf("?"));
}
}
- ImgurImage(URL url, String title) {
- this(url);
- this.title = title;
- }
- public ImgurImage(URL url, String title, String description) {
- this(url, title);
- this.description = description;
- }
+
String getSaveAs() {
String saveAs = this.title;
String u = url.toExternalForm();
@@ -613,7 +608,7 @@ public class ImgurRipper extends AlbumRipper {
public static class ImgurAlbum {
String title = null;
- public URL url = null;
+ public URL url;
public List images = new ArrayList<>();
ImgurAlbum(URL url) {
this.url = url;
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
index f231def4..4a4122ad 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@@ -1,18 +1,13 @@
package com.rarchives.ripme.ripper.rippers;
+import com.oracle.js.parser.ErrorManager;
+import com.oracle.js.parser.Parser;
+import com.oracle.js.parser.ScriptEnvironment;
+import com.oracle.js.parser.Source;
+import com.oracle.js.parser.ir.*;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
-import jdk.nashorn.internal.ir.Block;
-import jdk.nashorn.internal.ir.CallNode;
-import jdk.nashorn.internal.ir.ExpressionStatement;
-import jdk.nashorn.internal.ir.FunctionNode;
-import jdk.nashorn.internal.ir.Statement;
-import jdk.nashorn.internal.parser.Parser;
-import jdk.nashorn.internal.runtime.Context;
-import jdk.nashorn.internal.runtime.ErrorManager;
-import jdk.nashorn.internal.runtime.Source;
-import jdk.nashorn.internal.runtime.options.Options;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection;
@@ -26,12 +21,7 @@ import java.time.Instant;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Spliterators;
+import java.util.*;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
@@ -176,13 +166,17 @@ public class InstagramRipper extends AbstractJSONRipper {
if (postRip) {
return null;
}
- Predicate hrefFilter = (storiesRip || pinnedReelRip) ? href -> href.contains("Consumer.js") :
- href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
+
+ Predicate hrefFilter = href -> href.contains("Consumer.js");
+ if (taggedRip) {
+ hrefFilter = href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
+ }
String href = doc.select("link[rel=preload]").stream()
- .map(link -> link.attr("href"))
- .filter(hrefFilter)
- .findFirst().orElse("");
+ .map(link -> link.attr("href"))
+ .filter(hrefFilter)
+ .findFirst().orElse("");
+
String body = Http.url("https://www.instagram.com" + href).cookies(cookies).response().body();
Function hashExtractor =
@@ -198,7 +192,8 @@ public class InstagramRipper extends AbstractJSONRipper {
}
private String getProfileHash(String jsData) {
- return getHashValue(jsData, "loadProfilePageExtras", -1);
+ return getHashValue(jsData, "loadProfilePageExtras", -1,
+ s -> s.replaceAll(".*queryId\\s?:\\s?\"([0-9a-f]*)\".*", "$1"));
}
private String getPinnedHash(String jsData) {
@@ -386,7 +381,7 @@ public class InstagramRipper extends AbstractJSONRipper {
case "GraphSidecar":
JSONArray sideCar = getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges");
return getStreamOfJsonArray(sideCar).map(object -> object.getJSONObject("node"))
- .flatMap(this::parseRootForUrls);
+ .flatMap(this::parseRootForUrls);
default:
return Stream.empty();
}
@@ -413,26 +408,35 @@ public class InstagramRipper extends AbstractJSONRipper {
// Javascript parsing
/* ------------------------------------------------------------------------------------------------------- */
- private String getHashValue(String javaScriptData, String keyword, int offset) {
+ private String getHashValue(String javaScriptData, String keyword, int offset,
+ Function extractHash) {
List statements = getJsBodyBlock(javaScriptData).getStatements();
+
return statements.stream()
- .flatMap(statement -> filterItems(statement, ExpressionStatement.class))
- .map(ExpressionStatement::getExpression)
- .flatMap(expression -> filterItems(expression, CallNode.class))
- .map(CallNode::getArgs)
- .map(expressions -> expressions.get(0))
- .flatMap(expression -> filterItems(expression, FunctionNode.class))
- .map(FunctionNode::getBody)
- .map(Block::getStatements)
- .map(statementList -> lookForHash(statementList, keyword, offset))
- .filter(Objects::nonNull)
- .findFirst().orElse(null);
+ .flatMap(statement -> filterItems(statement, ExpressionStatement.class))
+ .map(ExpressionStatement::getExpression)
+ .flatMap(expression -> filterItems(expression, CallNode.class))
+ .map(CallNode::getArgs)
+ .map(expressions -> expressions.get(0))
+ .flatMap(expression -> filterItems(expression, FunctionNode.class))
+ .map(FunctionNode::getBody)
+ .map(Block::getStatements)
+ .map(statementList -> lookForHash(statementList, keyword, offset, extractHash))
+ .filter(Objects::nonNull)
+ .findFirst().orElse(null);
}
- private String lookForHash(List list, String keyword, int offset) {
+ private String getHashValue(String javaScriptData, String keyword, int offset) {
+ return getHashValue(javaScriptData, keyword, offset, null);
+ }
+
+ private String lookForHash(List list, String keyword, int offset, Function extractHash) {
for (int i = 0; i < list.size(); i++) {
Statement st = list.get(i);
if (st.toString().contains(keyword)) {
+ if (extractHash != null) {
+ return extractHash.apply(list.get(i + offset).toString());
+ }
return list.get(i + offset).toString().replaceAll(".*\"([0-9a-f]*)\".*", "$1");
}
}
@@ -444,9 +448,10 @@ public class InstagramRipper extends AbstractJSONRipper {
}
private Block getJsBodyBlock(String javaScriptData) {
- ErrorManager errors = new ErrorManager();
- Context context = new Context(new Options("nashorn"), errors, Thread.currentThread().getContextClassLoader());
- return new Parser(context.getEnv(), Source.sourceFor("name", javaScriptData), errors).parse().getBody();
+ ScriptEnvironment env = ScriptEnvironment.builder().ecmaScriptVersion(10).constAsVar(true).build();
+ ErrorManager errorManager = new ErrorManager.ThrowErrorManager();
+ Source src = Source.sourceFor("name", javaScriptData);
+ return new Parser(env, src, errorManager).parse().getBody();
}
// Some JSON helper methods below
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java
index e7af19bc..84fad505 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java
@@ -55,12 +55,6 @@ public class JabArchivesRipper extends AbstractHTMLRipper {
"jabarchives.com/main/view/albumname - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/JagodibujaRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/JagodibujaRipper.java
index d5df1fe5..2f2d5c33 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/JagodibujaRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/JagodibujaRipper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -40,12 +42,6 @@ public class JagodibujaRipper extends AbstractHTMLRipper {
throw new MalformedURLException("Expected jagodibuja.com gallery formats hwww.jagodibuja.com/Comic name/ got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
@@ -62,8 +58,8 @@ public class JagodibujaRipper extends AbstractHTMLRipper {
Element elem = comicPage.select("span.full-size-link > a").first();
LOGGER.info("Got link " + elem.attr("href"));
try {
- addURLToDownload(new URL(elem.attr("href")), "");
- } catch (MalformedURLException e) {
+ addURLToDownload(new URI(elem.attr("href")).toURL(), "");
+ } catch (MalformedURLException | URISyntaxException e) {
LOGGER.warn("Malformed URL");
e.printStackTrace();
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/Jpg3Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/Jpg3Ripper.java
new file mode 100644
index 00000000..c79e02bc
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/Jpg3Ripper.java
@@ -0,0 +1,70 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+public class Jpg3Ripper extends AbstractHTMLRipper {
+
+ public Jpg3Ripper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getDomain() {
+ return "jpg3.su";
+ }
+
+ @Override
+ public String getHost() {
+ return "jpg3";
+ }
+
+ @Override
+ public List getURLsFromPage(Document page) {
+ List urls = new ArrayList<>();
+
+ for (Element el : page.select(".image-container > img")) {
+ urls.add(el.attr("src").replaceAll("\\.md", ""));
+ }
+
+ return urls;
+ }
+
+ @Override
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
+ String u = url.toExternalForm();
+ u = u.replaceAll("https?://jpg3.su/a/([^/]+)/?.*", "https://jpg3.su/a/$1");
+ LOGGER.debug("Changed URL from " + url + " to " + u);
+ return new URI(u).toURL();
+ }
+
+ @Override
+ public Document getNextPage(Document page) throws IOException, URISyntaxException {
+ String href = page.select("[data-pagination='next']").attr("href");
+ if (!href.isEmpty()) {
+ return Http.url(href).get();
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ return url.toString().split("/")[url.toString().split("/").length - 1];
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixDotOneRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/KingcomixRipper.java
similarity index 53%
rename from src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixDotOneRipper.java
rename to src/main/java/com/rarchives/ripme/ripper/rippers/KingcomixRipper.java
index c1e7fac7..bb8194bc 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixDotOneRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/KingcomixRipper.java
@@ -14,49 +14,38 @@ import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
-public class PorncomixDotOneRipper extends AbstractHTMLRipper {
+public class KingcomixRipper extends AbstractHTMLRipper {
- public PorncomixDotOneRipper(URL url) throws IOException {
+ public KingcomixRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
- return "porncomix";
+ return "kingcomix";
}
@Override
public String getDomain() {
- return "porncomix.one";
+ return "kingcomix.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("https?://www.porncomix.one/gallery/([a-zA-Z0-9_\\-]*)/?$");
+ Pattern p = Pattern.compile("https://kingcomix.com/([a-zA-Z1-9_-]*)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
- throw new MalformedURLException("Expected proncomix URL format: " +
- "porncomix.one/gallery/comic - got " + url + " instead");
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
+ throw new MalformedURLException("Expected kingcomix URL format: " +
+ "kingcomix.com/COMIX - got " + url + " instead");
}
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
- // We have 2 loops here to cover all the different album types
- for (Element el : doc.select(".dgwt-jg-item > a")) {
- result.add(el.attr("href"));
- }
- for (Element el : doc.select(".unite-gallery > img")) {
- result.add(el.attr("data-image"));
-
+ for (Element el : doc.select("div.entry-content > p > img")) {
+ result.add(el.attr("src"));
}
return result;
}
@@ -65,4 +54,4 @@ public class PorncomixDotOneRipper extends AbstractHTMLRipper {
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java
index 8986fd91..408310a7 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java
@@ -1,234 +1,236 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.ripper.DownloadThreadPool;
-import com.rarchives.ripme.utils.Http;
-
-
-
-/**
- * @author Tushar
- *
- */
-public class ListalRipper extends AbstractHTMLRipper {
-
- private Pattern p1 = Pattern.compile("https:\\/\\/www.listal.com\\/list\\/([a-zA-Z0-9-]+)");
- private Pattern p2 =
- Pattern.compile("https:\\/\\/www.listal.com\\/((?:(?:[a-zA-Z0-9-]+)\\/?)+)");
- private String listId = null; // listId to get more images via POST.
- private String postUrl = "https://www.listal.com/item-list/"; //to load more images.
- private UrlType urlType = UrlType.UNKNOWN;
-
- private DownloadThreadPool listalThreadPool = new DownloadThreadPool("listalThreadPool");
-
- public ListalRipper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public String getDomain() {
- return "listal.com";
- }
-
- @Override
- public String getHost() {
- return "listal";
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- Document doc = Http.url(url).get();
- if (urlType == UrlType.LIST) {
- listId = doc.select("#customlistitems").first().attr("data-listid"); // Used for list types.
- }
- return doc;
- }
-
- @Override
- public List getURLsFromPage(Document page) {
- if (urlType == UrlType.LIST) {
- // for url of type LIST, https://www.listal.com/list/my-list
- return getURLsForListType(page);
- } else if (urlType == UrlType.FOLDER) {
- // for url of type FOLDER, https://www.listal.com/jim-carrey/pictures
- return getURLsForFolderType(page);
- }
- return null;
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- listalThreadPool.addThread(new ListalImageDownloadThread(url, index));
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Matcher m1 = p1.matcher(url.toExternalForm());
- if (m1.matches()) {
- // Return the text contained between () in the regex
- urlType = UrlType.LIST;
- return m1.group(1);
- }
-
- Matcher m2 = p2.matcher(url.toExternalForm());
- if (m2.matches()) {
- // Return only gid from capturing group of type listal.com/tvOrSomething/dexter/pictures
- urlType = UrlType.FOLDER;
- return getFolderTypeGid(m2.group(1));
- }
-
- throw new MalformedURLException("Expected listal.com URL format: "
- + "listal.com/list/my-list-name - got " + url + " instead.");
- }
-
- @Override
- public Document getNextPage(Document page) throws IOException {
- Document nextPage = super.getNextPage(page);
- switch (urlType) {
- case LIST:
- if (!page.select(".loadmoreitems").isEmpty()) {
- // All items are not loaded.
- // Load remaining items using postUrl.
-
- String offSet = page.select(".loadmoreitems").last().attr("data-offset");
- Map postParams = new HashMap<>();
- postParams.put("listid", listId);
- postParams.put("offset", offSet);
- try {
- nextPage = Http.url(postUrl).data(postParams).retries(3).post();
- } catch (IOException e1) {
- LOGGER.error("Failed to load more images after " + offSet, e1);
- throw e1;
- }
- }
- break;
-
- case FOLDER:
- Elements pageLinks = page.select(".pages a");
- if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) {
- String nextUrl = pageLinks.last().attr("abs:href");
- nextPage = Http.url(nextUrl).retries(3).get();
- }
- break;
-
- case UNKNOWN:
- default:
- }
- return nextPage;
- }
-
-
- @Override
- public DownloadThreadPool getThreadPool() {
- return listalThreadPool;
- }
-
- /**
- * Returns the image urls for UrlType LIST.
- */
- private List getURLsForListType(Document page) {
- List list = new ArrayList<>();
- for (Element e : page.select(".pure-g a[href*=viewimage]")) {
- //list.add("https://www.listal.com" + e.attr("href") + "h");
- list.add(e.attr("abs:href") + "h");
- }
-
- return list;
- }
-
- /**
- * Returns the image urls for UrlType FOLDER.
- */
- private List getURLsForFolderType(Document page) {
- List list = new ArrayList<>();
- for (Element e : page.select("#browseimagescontainer .imagewrap-outer a")) {
- list.add(e.attr("abs:href") + "h");
- }
- return list;
- }
-
- /**
- * Returns the gid for url type listal.com/tvOrSomething/dexter/pictures
- */
- public String getFolderTypeGid(String group) throws MalformedURLException {
- String[] folders = group.split("/");
- try {
- if (folders.length == 2 && folders[1].equals("pictures")) {
- // Url is probably for an actor.
- return folders[0];
- }
-
- if (folders.length == 3 && folders[2].equals("pictures")) {
- // Url if for a folder(like movies, tv etc).
- Document doc = Http.url(url).get();
- return doc.select(".itemheadingmedium").first().text();
- }
-
- } catch (Exception e) {
- LOGGER.error(e);
- }
- throw new MalformedURLException("Unable to fetch the gid for given url.");
- }
-
- private class ListalImageDownloadThread extends Thread {
-
- private URL url;
- private int index;
-
- public ListalImageDownloadThread(URL url, int index) {
- super();
- this.url = url;
- this.index = index;
- }
-
- @Override
- public void run() {
- getImage();
- }
-
- public void getImage() {
- try {
- Document doc = Http.url(url).get();
-
- String imageUrl = doc.getElementsByClass("pure-img").attr("src");
- if (imageUrl != "") {
- addURLToDownload(new URL(imageUrl), getPrefix(index), "", null, null,
- getImageName());
- } else {
- LOGGER.error("Couldnt find image from url: " + url);
- }
- } catch (IOException e) {
- LOGGER.error("[!] Exception while downloading image: " + url, e);
- }
- }
-
- public String getImageName() {
- // Returns the image number of the link if possible.
- String name = this.url.toExternalForm();
- try {
- name = name.substring(name.lastIndexOf("/") + 1);
- } catch (Exception e) {
- LOGGER.info("Failed to get name for the image.");
- name = null;
- }
- // Listal stores images as .jpg
- return name + ".jpg";
- }
- }
-
- private static enum UrlType {
- LIST, FOLDER, UNKNOWN
- }
-}
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.ripper.DownloadThreadPool;
+import com.rarchives.ripme.utils.Http;
+
+
+
+/**
+ * @author Tushar
+ *
+ */
+public class ListalRipper extends AbstractHTMLRipper {
+
+ private Pattern p1 = Pattern.compile("https:\\/\\/www.listal.com\\/list\\/([a-zA-Z0-9-]+)");
+ private Pattern p2 =
+ Pattern.compile("https:\\/\\/www.listal.com\\/((?:(?:[a-zA-Z0-9-_%]+)\\/?)+)");
+ private String listId = null; // listId to get more images via POST.
+ private String postUrl = "https://www.listal.com/item-list/"; //to load more images.
+ private UrlType urlType = UrlType.UNKNOWN;
+
+ private DownloadThreadPool listalThreadPool = new DownloadThreadPool("listalThreadPool");
+
+ public ListalRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getDomain() {
+ return "listal.com";
+ }
+
+ @Override
+ public String getHost() {
+ return "listal";
+ }
+
+ @Override
+ public Document getFirstPage() throws IOException {
+ Document doc = Http.url(url).get();
+ if (urlType == UrlType.LIST) {
+ listId = doc.select("#customlistitems").first().attr("data-listid"); // Used for list types.
+ }
+ return doc;
+ }
+
+ @Override
+ public List getURLsFromPage(Document page) {
+ if (urlType == UrlType.LIST) {
+ // for url of type LIST, https://www.listal.com/list/my-list
+ return getURLsForListType(page);
+ } else if (urlType == UrlType.FOLDER) {
+ // for url of type FOLDER, https://www.listal.com/jim-carrey/pictures
+ return getURLsForFolderType(page);
+ }
+ return null;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ listalThreadPool.addThread(new ListalImageDownloadThread(url, index));
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Matcher m1 = p1.matcher(url.toExternalForm());
+ if (m1.matches()) {
+ // Return the text contained between () in the regex
+ urlType = UrlType.LIST;
+ return m1.group(1);
+ }
+
+ Matcher m2 = p2.matcher(url.toExternalForm());
+ if (m2.matches()) {
+ // Return only gid from capturing group of type listal.com/tvOrSomething/dexter/pictures
+ urlType = UrlType.FOLDER;
+ return getFolderTypeGid(m2.group(1));
+ }
+
+ throw new MalformedURLException("Expected listal.com URL format: "
+ + "listal.com/list/my-list-name - got " + url + " instead.");
+ }
+
+ @Override
+ public Document getNextPage(Document page) throws IOException, URISyntaxException {
+ Document nextPage = super.getNextPage(page);
+ switch (urlType) {
+ case LIST:
+ if (!page.select(".loadmoreitems").isEmpty()) {
+ // All items are not loaded.
+ // Load remaining items using postUrl.
+
+ String offSet = page.select(".loadmoreitems").last().attr("data-offset");
+ Map postParams = new HashMap<>();
+ postParams.put("listid", listId);
+ postParams.put("offset", offSet);
+ try {
+ nextPage = Http.url(postUrl).data(postParams).retries(3).post();
+ } catch (IOException e1) {
+ LOGGER.error("Failed to load more images after " + offSet, e1);
+ throw e1;
+ }
+ }
+ break;
+
+ case FOLDER:
+ Elements pageLinks = page.select(".pages a");
+ if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) {
+ String nextUrl = pageLinks.last().attr("abs:href");
+ nextPage = Http.url(nextUrl).retries(3).get();
+ }
+ break;
+
+ case UNKNOWN:
+ default:
+ }
+ return nextPage;
+ }
+
+
+ @Override
+ public DownloadThreadPool getThreadPool() {
+ return listalThreadPool;
+ }
+
+ /**
+ * Returns the image urls for UrlType LIST.
+ */
+ private List getURLsForListType(Document page) {
+ List list = new ArrayList<>();
+ for (Element e : page.select(".pure-g a[href*=viewimage]")) {
+ //list.add("https://www.listal.com" + e.attr("href") + "h");
+ list.add(e.attr("abs:href") + "h");
+ }
+
+ return list;
+ }
+
+ /**
+ * Returns the image urls for UrlType FOLDER.
+ */
+ private List getURLsForFolderType(Document page) {
+ List list = new ArrayList<>();
+ for (Element e : page.select("#browseimagescontainer .imagewrap-outer a")) {
+ list.add(e.attr("abs:href") + "h");
+ }
+ return list;
+ }
+
+ /**
+ * Returns the gid for url type listal.com/tvOrSomething/dexter/pictures
+ */
+ public String getFolderTypeGid(String group) throws MalformedURLException {
+ String[] folders = group.split("/");
+ try {
+ if (folders.length == 2 && folders[1].equals("pictures")) {
+ // Url is probably for an actor.
+ return folders[0];
+ }
+
+ if (folders.length == 3 && folders[2].equals("pictures")) {
+ // Url if for a folder(like movies, tv etc).
+ Document doc = Http.url(url).get();
+ return doc.select(".itemheadingmedium").first().text();
+ }
+
+ } catch (Exception e) {
+ LOGGER.error(e);
+ }
+ throw new MalformedURLException("Unable to fetch the gid for given url.");
+ }
+
+ private class ListalImageDownloadThread implements Runnable {
+
+ private final URL url;
+ private final int index;
+
+ public ListalImageDownloadThread(URL url, int index) {
+ super();
+ this.url = url;
+ this.index = index;
+ }
+
+ @Override
+ public void run() {
+ getImage();
+ }
+
+ public void getImage() {
+ try {
+ Document doc = Http.url(url).get();
+
+ String imageUrl = doc.getElementsByClass("pure-img").attr("src");
+ if (imageUrl != "") {
+ addURLToDownload(new URI(imageUrl).toURL(), getPrefix(index), "", null, null,
+ getImageName());
+ } else {
+ LOGGER.error("Couldnt find image from url: " + url);
+ }
+ } catch (IOException | URISyntaxException e) {
+ LOGGER.error("[!] Exception while downloading image: " + url, e);
+ }
+ }
+
+ public String getImageName() {
+ // Returns the image number of the link if possible.
+ String name = this.url.toExternalForm();
+ try {
+ name = name.substring(name.lastIndexOf("/") + 1);
+ } catch (Exception e) {
+ LOGGER.info("Failed to get name for the image.");
+ name = null;
+ }
+ // Listal stores images as .jpg
+ return name + ".jpg";
+ }
+ }
+
+ private static enum UrlType {
+ LIST, FOLDER, UNKNOWN
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
index b10a1dc2..53b0fef5 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
@@ -1,26 +1,26 @@
package com.rarchives.ripme.ripper.rippers;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.jsoup.Connection;
+import org.jsoup.nodes.Document;
+
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
+import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.ripper.DownloadThreadPool;
-import com.rarchives.ripme.utils.Http;
-
public class LusciousRipper extends AbstractHTMLRipper {
- private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.
+ private static String albumid;
- private Pattern p = Pattern.compile("^https?://(?:www\\.)?(?:members\\.||legacy\\.||old\\.)?luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$");
- private DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");
+ private static final Pattern p = Pattern.compile("^https?://(?:www\\.)?(?:members\\.||legacy\\.||old\\.)?luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$");
public LusciousRipper(URL url) throws IOException {
super(url);
@@ -46,40 +46,48 @@ public class LusciousRipper extends AbstractHTMLRipper {
}
@Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- Document page = Http.url(url).get();
- LOGGER.info("First page is " + url);
- return page;
- }
-
- @Override
- public List getURLsFromPage(Document page) {
+ public List getURLsFromPage(Document page) { // gets urls for all pages through the api
List urls = new ArrayList<>();
- Elements urlElements = page.select("div.item.thumbnail.ic_container > a");
- for (Element e : urlElements) {
- urls.add(e.attr("abs:href"));
+ int totalPages = 1;
+
+ for (int i = 1; i <= totalPages; i++) {
+ String APIStringWOVariables = "https://apicdn.luscious.net/graphql/nobatch/?operationName=PictureListInsideAlbum&query=%2520query%2520PictureListInsideAlbum%28%2524input%253A%2520PictureListInput%21%29%2520%257B%2520picture%2520%257B%2520list%28input%253A%2520%2524input%29%2520%257B%2520info%2520%257B%2520...FacetCollectionInfo%2520%257D%2520items%2520%257B%2520__typename%2520id%2520title%2520description%2520created%2520like_status%2520number_of_comments%2520number_of_favorites%2520moderation_status%2520width%2520height%2520resolution%2520aspect_ratio%2520url_to_original%2520url_to_video%2520is_animated%2520position%2520permissions%2520url%2520tags%2520%257B%2520category%2520text%2520url%2520%257D%2520thumbnails%2520%257B%2520width%2520height%2520size%2520url%2520%257D%2520%257D%2520%257D%2520%257D%2520%257D%2520fragment%2520FacetCollectionInfo%2520on%2520FacetCollectionInfo%2520%257B%2520page%2520has_next_page%2520has_previous_page%2520total_items%2520total_pages%2520items_per_page%2520url_complete%2520%257D%2520&variables=";
+ Connection con = Http.url(APIStringWOVariables + encodeVariablesPartOfURL(i, albumid)).method(Connection.Method.GET).retries(5).connection();
+ con.ignoreHttpErrors(true);
+ con.ignoreContentType(true);
+ con.userAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0");
+ Connection.Response res;
+ try {
+ res = con.execute();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ String body = res.body();
+
+ JSONObject jsonObject = new JSONObject(body);
+
+ JSONObject data = jsonObject.getJSONObject("data");
+ JSONObject picture = data.getJSONObject("picture");
+ JSONObject list = picture.getJSONObject("list");
+ JSONArray items = list.getJSONArray("items");
+ JSONObject info = list.getJSONObject("info");
+ totalPages = info.getInt("total_pages");
+
+ for (int j = 0; j < items.length(); j++) {
+ JSONObject item = items.getJSONObject(j);
+ String urlToOriginal = item.getString("url_to_original");
+ urls.add(urlToOriginal);
+ }
}
return urls;
}
- @Override
- public Document getNextPage(Document doc) throws IOException {
- // luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
- // Simply GET the nextPageUrl also works. Therefore, we do this...
- Element nextPageElement = doc.select("div#next_page > div > a").first();
- if (nextPageElement == null) {
- throw new IOException("No next page found.");
- }
-
- return Http.url(nextPageElement.attr("abs:href")).get();
- }
-
@Override
public String getGID(URL url) throws MalformedURLException {
- Matcher m = p.matcher(url.toExternalForm());
+ Matcher m = P.matcher(url.toExternalForm());
if (m.matches()) {
+ albumid = m.group(1).split("_")[m.group(1).split("_").length - 1];
return m.group(1);
}
throw new MalformedURLException("Expected luscious.net URL format: "
@@ -87,45 +95,17 @@ public class LusciousRipper extends AbstractHTMLRipper {
}
@Override
- public void downloadURL(URL url, int index) {
- lusciousThreadPool.addThread(new LusciousDownloadThread(url, index));
+ protected void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
}
- @Override
- public DownloadThreadPool getThreadPool() {
- return lusciousThreadPool;
- }
+ public static String encodeVariablesPartOfURL(int page, String albumId) {
+ try {
+ String json = "{\"input\":{\"filters\":[{\"name\":\"album_id\",\"value\":\"" + albumId + "\"}],\"display\":\"rating_all_time\",\"items_per_page\":50,\"page\":" + page + "}}";
- public class LusciousDownloadThread extends Thread {
- private URL url;
- private int index;
-
- public LusciousDownloadThread(URL url, int index) {
- this.url = url;
- this.index = index;
+ return URLEncoder.encode(json, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new IllegalStateException("Could not encode variables");
}
-
- @Override
- public void run() {
- try {
- Document page = Http.url(url).retries(RETRY_COUNT).get();
-
- String downloadUrl = page.select(".icon-download").attr("abs:href");
- if (downloadUrl.equals("")) {
- // This is here for pages with mp4s instead of images.
- downloadUrl = page.select("div > video > source").attr("src");
- if (!downloadUrl.equals("")) {
- throw new IOException("Could not find download url for image or video.");
- }
- }
-
- //If a valid download url was found.
- addURLToDownload(new URL(downloadUrl), getPrefix(index));
-
- } catch (IOException e) {
- LOGGER.error("Error downloadiong url " + url, e);
- }
- }
-
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java
index 6697a45b..8c6c9227 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java
@@ -1,36 +1,42 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
+import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.Http;
-import com.rarchives.ripme.utils.Utils;
import org.json.JSONArray;
import org.json.JSONObject;
-import org.jsoup.Connection;
-import org.jsoup.nodes.Document;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
+import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MangadexRipper extends AbstractJSONRipper {
- private String chapterApiEndPoint = "https://mangadex.org/api/chapter/";
-
- private String getImageUrl(String chapterHash, String imageName, String server) {
- return server + chapterHash + "/" + imageName;
- }
+ private final String chapterApiEndPoint = "https://mangadex.org/api/chapter/";
+ private final String mangaApiEndPoint = "https://mangadex.org/api/manga/";
+ private boolean isSingleChapter;
public MangadexRipper(URL url) throws IOException {
super(url);
}
+ private String getImageUrl(String chapterHash, String imageName, String server) {
+ return server + chapterHash + "/" + imageName;
+ }
+
@Override
public String getHost() {
return "mangadex";
}
+
@Override
public String getDomain() {
return "mangadex.org";
@@ -44,14 +50,19 @@ public class MangadexRipper extends AbstractJSONRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
String capID = getChapterID(url.toExternalForm());
+ String mangaID = getMangaID(url.toExternalForm());
if (capID != null) {
+ isSingleChapter = true;
return capID;
+ } else if (mangaID != null) {
+ isSingleChapter = false;
+ return mangaID;
}
throw new MalformedURLException("Unable to get chapter ID from" + url);
}
private String getChapterID(String url) {
- Pattern p = Pattern.compile("https://mangadex.org/chapter/([\\d]+)/?");
+ Pattern p = Pattern.compile("https://mangadex.org/chapter/([\\d]+)/([\\d+]?)");
Matcher m = p.matcher(url);
if (m.matches()) {
return m.group(1);
@@ -59,26 +70,79 @@ public class MangadexRipper extends AbstractJSONRipper {
return null;
}
+ private String getMangaID(String url) {
+ Pattern p = Pattern.compile("https://mangadex.org/title/([\\d]+)/(.+)");
+ Matcher m = p.matcher(url);
+ if (m.matches()) {
+ return m.group(1);
+ }
+ return null;
+ }
+
+
@Override
- public JSONObject getFirstPage() throws IOException {
+ public JSONObject getFirstPage() throws IOException, URISyntaxException {
// Get the chapter ID
String chapterID = getChapterID(url.toExternalForm());
- return Http.url(new URL(chapterApiEndPoint + chapterID)).getJSON();
+ String mangaID = getMangaID(url.toExternalForm());
+ if (mangaID != null) {
+ return Http.url(new URI(mangaApiEndPoint + mangaID).toURL()).getJSON();
+ } else
+ return Http.url(new URI(chapterApiEndPoint + chapterID).toURL()).getJSON();
}
@Override
protected List getURLsFromJSON(JSONObject json) {
+ if (isSingleChapter) {
+ List assetURLs = new ArrayList<>();
+ JSONArray currentObject;
+ String chapterHash;
+ // Server is the cdn hosting the images.
+ String server;
+ chapterHash = json.getString("hash");
+ server = json.getString("server");
+ for (int i = 0; i < json.getJSONArray("page_array").length(); i++) {
+ currentObject = json.getJSONArray("page_array");
+
+ assetURLs.add(getImageUrl(chapterHash, currentObject.getString(i), server));
+ }
+ return assetURLs;
+ }
+ JSONObject chaptersJSON = (JSONObject) json.get("chapter");
+ JSONObject temp;
+ Iterator keys = chaptersJSON.keys();
+ HashMap chapterIDs = new HashMap<>();
+ while (keys.hasNext()) {
+ String keyValue = keys.next();
+ temp = (JSONObject) chaptersJSON.get(keyValue);
+ if (temp.getString("lang_name").equals("English")) {
+ chapterIDs.put(temp.getDouble("chapter"), keyValue);
+ }
+
+ }
+
List assetURLs = new ArrayList<>();
JSONArray currentObject;
-
- String chapterHash = json.getString("hash");
+ String chapterHash;
// Server is the cdn hosting the images.
- String server = json.getString("server");
+ String server;
+ JSONObject chapterJSON = null;
+ TreeMap treeMap = new TreeMap<>(chapterIDs);
+ for (Double aDouble : treeMap.keySet()) {
+ double key = (double) aDouble;
+ try {
+ chapterJSON = Http.url(new URI(chapterApiEndPoint + treeMap.get(key)).toURL()).getJSON();
+ } catch (IOException | URISyntaxException e) {
+ e.printStackTrace();
+ }
+ sendUpdate(RipStatusMessage.STATUS.LOADING_RESOURCE, "chapter " + key);
+ chapterHash = chapterJSON.getString("hash");
+ server = chapterJSON.getString("server");
+ for (int i = 0; i < chapterJSON.getJSONArray("page_array").length(); i++) {
+ currentObject = chapterJSON.getJSONArray("page_array");
- for (int i = 0; i < json.getJSONArray("page_array").length(); i++) {
- currentObject = json.getJSONArray("page_array");
-
- assetURLs.add(getImageUrl(chapterHash, currentObject.getString(i), server));
+ assetURLs.add(getImageUrl(chapterHash, currentObject.getString(i), server));
+ }
}
return assetURLs;
@@ -91,4 +155,5 @@ public class MangadexRipper extends AbstractJSONRipper {
addURLToDownload(url, getPrefix(index));
}
-}
+
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ManganeloRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ManganeloRipper.java
index f4325aa1..c5f6b142 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ManganeloRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ManganeloRipper.java
@@ -48,12 +48,6 @@ public class ManganeloRipper extends AbstractHTMLRipper {
"/manganelo.com/manga/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
Element elem = doc.select("div.btn-navigation-chap > a.back").first();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
index 8bdd2b2f..2c83ce7e 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java
@@ -21,12 +21,12 @@ public class MeituriRipper extends AbstractHTMLRipper {
@Override
public String getHost() {
- return "meituri";
+ return "tujigu";
}
@Override
public String getDomain() {
- return "meituri.com";
+ return "tujigu.com";
}
// To use in getting URLs
@@ -35,23 +35,18 @@ public class MeituriRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
// without escape
- // ^https?://[w.]*meituri\.com/a/([0-9]+)/([0-9]+\.html)*$
- // https://www.meituri.com/a/14449/
- // also matches https://www.meituri.com/a/14449/3.html etc.
+ // ^https?://[w.]*tujigu\.com/a/([0-9]+)/([0-9]+\.html)*$
+ // https://www.tujigu.com/a/14449/
+ // also matches https://www.tujigu.com/a/14449/3.html etc.
// group 1 is 14449
- Pattern p = Pattern.compile("^https?://[w.]*meituri\\.com/a/([0-9]+)/([0-9]+\\.html)*$");
+ Pattern p = Pattern.compile("^https?://[w.]*tujigu\\.com/a/([0-9]+)/([0-9]+\\.html)*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
albumID = m.group(1);
return m.group(1);
}
throw new MalformedURLException(
- "Expected meituri.com URL format: " + "meituri.com/a/albumid/ - got " + url + "instead");
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
+ "Expected tujigu.com URL format: " + "tujigu.com/a/albumid/ - got " + url + "instead");
}
@Override
@@ -71,7 +66,7 @@ public class MeituriRipper extends AbstractHTMLRipper {
}
// Base URL: http://ii.hywly.com/a/1/albumid/imgnum.jpg
- String baseURL = "http://ii.hywly.com/a/1/" + albumID + "/";
+ String baseURL = "https://tjg.hywly.com/a/1/" + albumID + "/";
// Loop through and add images to the URL list
for (int i = 1; i <= numOfImages; i++) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java
index 0b513b37..c2d6ed47 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java
@@ -41,11 +41,6 @@ public class ModelxRipper extends AbstractHTMLRipper {
throw new MalformedURLException("Expected URL format: http://www.modelx.org/[category (one or more)]/xxxxx got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document page) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java
index 7bb8451a..d2af02a1 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -13,7 +15,6 @@ import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
-import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.select.Elements;
@@ -59,20 +60,21 @@ public class MotherlessRipper extends AbstractHTMLRipper {
if (!notHome) {
StringBuilder newPath = new StringBuilder(path);
newPath.insert(2, "M");
- firstURL = new URL(this.url, "https://" + DOMAIN + newPath);
+ firstURL = URI.create("https://" + DOMAIN + newPath).toURL();
LOGGER.info("Changed URL to " + firstURL);
}
return Http.url(firstURL).referrer("https://motherless.com").get();
}
@Override
- public Document getNextPage(Document doc) throws IOException {
+ public Document getNextPage(Document doc) throws IOException, URISyntaxException {
+
Elements nextPageLink = doc.head().select("link[rel=next]");
if (nextPageLink.isEmpty()) {
throw new IOException("Last page reached");
} else {
String referrerLink = doc.head().select("link[rel=canonical]").first().attr("href");
- URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
+ URL nextURL = this.url.toURI().resolve(nextPageLink.first().attr("href")).toURL();
return Http.url(nextURL).referrer(referrerLink).get();
}
}
@@ -81,7 +83,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
protected List getURLsFromPage(Document page) {
List pageURLs = new ArrayList<>();
- for (Element thumb : page.select("div.thumb a.img-container")) {
+ for (Element thumb : page.select("div.thumb-container a.img-container")) {
if (isStopped()) {
break;
}
@@ -109,7 +111,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
@Override
protected void downloadURL(URL url, int index) {
// Create thread for finding image at "url" page
- MotherlessImageThread mit = new MotherlessImageThread(url, index);
+ MotherlessImageRunnable mit = new MotherlessImageRunnable(url, index);
motherlessThreadPool.addThread(mit);
try {
Thread.sleep(IMAGE_SLEEP_TIME);
@@ -148,15 +150,19 @@ public class MotherlessRipper extends AbstractHTMLRipper {
throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url);
}
-
+ @Override
+ protected DownloadThreadPool getThreadPool() {
+ return motherlessThreadPool;
+ }
+
/**
* Helper class to find and download images found on "image" pages
*/
- private class MotherlessImageThread extends Thread {
- private URL url;
- private int index;
+ private class MotherlessImageRunnable implements Runnable {
+ private final URL url;
+ private final int index;
- MotherlessImageThread(URL url, int index) {
+ MotherlessImageRunnable(URL url, int index) {
super();
this.url = url;
this.index = index;
@@ -180,11 +186,11 @@ public class MotherlessRipper extends AbstractHTMLRipper {
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
- addURLToDownload(new URL(file), prefix);
+ addURLToDownload(new URI(file).toURL(), prefix);
} else {
LOGGER.warn("[!] could not find '__fileurl' at " + url);
}
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java
new file mode 100644
index 00000000..642c6417
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java
@@ -0,0 +1,223 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+
+public class MrCongRipper extends AbstractHTMLRipper {
+
+ private Document currDoc;
+ private int lastPageNum;
+ private int currPageNum;
+ private boolean tagPage = false;
+
+ public MrCongRipper(URL url) throws IOException {
+ super(url);
+ currPageNum = 1;
+ }
+
+ @Override
+ public String getHost() {
+ return "mrcong";
+ }
+
+ @Override
+ public String getDomain() {
+ return "mrcong.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ System.out.println(url.toExternalForm());
+ Pattern p = Pattern.compile("^https?://mrcong\\.com/(\\S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$");
+ Pattern p2 = Pattern.compile("^https?://mrcong\\.com/tag/(\\S*)/$"); //Added 6-10-21
+ Matcher m = p.matcher(url.toExternalForm());
+ Matcher m2 = p2.matcher(url.toExternalForm()); //6-10-21
+ if (m.matches()) {
+ return m.group(1);
+ }
+ else if(m2.matches()) { //Added 6-10-21
+ tagPage = true;
+ System.out.println("tagPage = TRUE");
+ return m2.group(1);
+ }
+
+ throw new MalformedURLException("Expected mrcong.com URL format: "
+ + "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead");
+ }
+
+ @Override
+ public Document getFirstPage() throws IOException { //returns the root gallery page regardless of actual page number
+ // "url" is an instance field of the superclass
+ String rootUrlStr;
+ URL rootUrl;
+
+ if(!tagPage) {
+ rootUrlStr = url.toExternalForm().replaceAll("(|/|/[0-9]+/?)$", "/");
+ } else { //6-10-21
+ rootUrlStr = url.toExternalForm().replaceAll("(page/[0-9]+/)$", "page/1/");
+ }
+
+ rootUrl = URI.create(rootUrlStr).toURL();
+ url = rootUrl;
+ currPageNum = 1;
+ currDoc = Http.url(url).get();
+ getMaxPageNumber(currDoc);
+ return currDoc;
+ }
+
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ int pageNum = currPageNum;
+ String urlStr;
+ if(!tagPage) {
+ if (pageNum == 1 && lastPageNum > 1) {
+ urlStr = url.toExternalForm().concat((pageNum + 1) + "");
+ System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
+ } else if (pageNum < lastPageNum) {
+ urlStr = url.toExternalForm().replaceAll("(/([0-9]*)/?)$", ("/" + (pageNum + 1) + "/"));
+ System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
+ } else {
+ //System.out.printf("Error: Page number provided goes past last valid page number\n");
+ throw (new IOException("Error: Page number provided goes past last valid page number\n"));
+ }
+ } else { //6-10-21
+ //if (pageNum == 1 && lastPageNum >= 1) {
+ if (pageNum == 1 && lastPageNum > 1) { //6-10-21
+ urlStr = url.toExternalForm().concat("page/" + (pageNum + 1) + "");
+ System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
+ } else if (pageNum < lastPageNum) {
+ urlStr = url.toExternalForm().replaceAll("(page/([0-9]*)/?)$", ("page/" + (pageNum + 1) + "/"));
+ System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
+ } else {
+ //System.out.printf("Error: Page number provided goes past last valid page number\n");
+ System.out.print("Error: There is no next page!\n");
+ return null;
+ //throw (new IOException("Error: Page number provided goes past last valid page number\n"));
+ }
+ }
+
+ url = URI.create(urlStr).toURL();
+ currDoc = Http.url(url).get();
+ currPageNum ++;//hi
+ return currDoc;
+ }
+
+ private int getMaxPageNumber(Document doc) {
+ if(!tagPage) {
+ try {
+ lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); //gets the last possible page for the gallery
+ } catch(Exception e) {
+ return 1;
+ }
+ } else {
+ try {
+ lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); //gets the last possible page for the gallery
+ System.out.println("The last page found for " + url + " was " + lastPageNum);
+ } catch(Exception e) {
+ return 1;
+ }
+ }
+
+ return lastPageNum;
+ }
+
+ private int getCurrentPageNum(Document doc) {
+ int currPage; //6-10-21
+
+ if(!tagPage) {
+ currPage = Integer.parseInt(doc.select("div.page-link > span").first().text());
+ } else {
+ currPage = Integer.parseInt(doc.select("div.pagination > span").first().text());
+ }
+
+ System.out.println("The current page was found to be: " + currPage);
+
+ return currPage;
+ }
+
+ @Override
+ public List getURLsFromPage(Document doc) { //gets the urls of the images
+ List result = new ArrayList<>();
+
+ if(!tagPage) {
+ for (Element el : doc.select("p > img")) {
+ String imageSource = el.attr("src");
+ result.add(imageSource);
+ }
+
+ System.out.println("\n1.)Printing List: " + result + "\n");
+ } else { //6-10-21
+ //List gallery_set_list = new ArrayList<>();
+
+ for (Element el : doc.select("h2 > a")) {
+ String pageSource = el.attr("href");
+ if(!pageSource.equals("https://mrcong.com/")) {
+ result.add(pageSource);
+ System.out.println("\n" + pageSource + " has been added to the list.");
+ }
+ }
+
+ /*for (String el2 : gallery_set_list) {
+ try {
+ URL temp_urL = URI.create(el2).toURL();
+ MrCongRipper mcr = new MrCongRipper(temp_urL);
+ System.out.println("URL being ripped: " + mcr.url.toString());
+ result.addAll(mcr.getURLsFromPage(mcr.getFirstPage()));
+
+ Document nextPg = mcr.getNextPage(mcr.currDoc);
+ while(nextPg != null) {
+ result.addAll(mcr.getURLsFromPage(nextPg));
+ nextPg = mcr.getNextPage(mcr.currDoc);
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }*/
+
+ System.out.println("\n2.)Printing List: " + result + "\n");
+ }
+
+ return result;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ //addURLToDownload(url, getPrefix(index));
+
+ if(!tagPage) {
+ addURLToDownload(url, getPrefix(index));
+ } else {
+ try {
+ List ls = this.getURLsFromPage(this.currDoc);
+ Document np = this.getNextPage(this.currDoc);
+
+ while(np != null) { //Creates a list of all sets to download
+ ls.addAll(this.getURLsFromPage(np));
+ np = this.getNextPage(np);
+ }
+
+ for(String urlStr : ls) {
+ MrCongRipper mcr = new MrCongRipper(URI.create(urlStr).toURL());
+ mcr.setup();
+ mcr.rip();
+ }
+
+ } catch (IOException | URISyntaxException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MultpornRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MultpornRipper.java
new file mode 100644
index 00000000..cdc873f2
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MultpornRipper.java
@@ -0,0 +1,71 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class MultpornRipper extends AbstractHTMLRipper {
+
+ public MultpornRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ protected String getDomain() {
+ return "multporn.net";
+ }
+
+ @Override
+ public String getHost() {
+ return "multporn";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException, URISyntaxException {
+ Pattern p = Pattern.compile("^https?://multporn\\.net/node/(\\d+)/.*$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+
+ try {
+ String nodeHref = Http.url(url).get().select(".simple-mode-switcher").attr("href");
+ p = Pattern.compile("/node/(\\d+)/.*");
+ m = p.matcher(nodeHref);
+ if (m.matches()) {
+ this.url = new URI("https://multporn.net" + nodeHref).toURL();
+ return m.group(1);
+ }
+ }catch (Exception ignored){};
+
+ throw new MalformedURLException("Expected multporn.net URL format: " +
+ "multporn.net/comics/comicid / multporn.net/node/id/* - got " + url + " instead");
+ }
+
+ @Override
+ protected List getURLsFromPage(Document page) {
+ List imageURLs = new ArrayList<>();
+ Elements thumbs = page.select(".mfp-gallery-image .mfp-item");
+ for (Element el : thumbs) {
+ imageURLs.add(el.attr("href"));
+ }
+ return imageURLs;
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java
index 453826a3..deedfb88 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java
@@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -13,8 +14,6 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
public class MyhentaicomicsRipper extends AbstractHTMLRipper {
- private static boolean isTag;
-
public MyhentaicomicsRipper(URL url) throws IOException {
super(url);
}
@@ -69,7 +68,6 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+?=:]*)?$");
Matcher mat = pat.matcher(url.toExternalForm());
if (mat.matches()) {
- isTag = true;
return true;
}
return false;
@@ -85,9 +83,8 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
}
@Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
+ public Document getFirstPage() throws IOException, URISyntaxException {
+ return super.getFirstPage();
}
@Override
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaigalleryRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaigalleryRipper.java
index d8422942..c9f4c0bd 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaigalleryRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaigalleryRipper.java
@@ -40,12 +40,6 @@ public class MyhentaigalleryRipper extends AbstractHTMLRipper {
+ "myhentaigallery.com/gallery/thumbnails/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MyreadingmangaRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MyreadingmangaRipper.java
index 20a3cf2d..30fab521 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MyreadingmangaRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MyreadingmangaRipper.java
@@ -41,12 +41,6 @@ public class MyreadingmangaRipper extends AbstractHTMLRipper {
+ "myreadingmanga.info/title - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NatalieMuRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NatalieMuRipper.java
index 952b434e..8cf24fd8 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NatalieMuRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NatalieMuRipper.java
@@ -79,11 +79,6 @@ public class NatalieMuRipper extends AbstractHTMLRipper {
return this.url.getHost();
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(this.url).get();
- }
-
@Override
public List getURLsFromPage(Document page) {
List imageURLs = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java
index b3ededc4..a7be157a 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java
@@ -53,7 +53,7 @@ public class NewgroundsRipper extends AbstractHTMLRipper {
@Override
protected Document getFirstPage() throws IOException {
- return Http.url("https://" + this.username + ".newgrounds.com/art").get();
+ return Http.url("https://" + this.username + ".newgrounds.com/art").timeout(10*1000).get();
}
@Override
@@ -71,7 +71,7 @@ public class NewgroundsRipper extends AbstractHTMLRipper {
List imageURLs = new ArrayList<>();
String documentHTMLString = page.toString().replaceAll(""", "");
- String findStr = "newgrounds.com\\/art\\/view\\/" + this.username;
+ String findStr = "newgrounds.com/art/view/" + this.username;
int lastIndex = 0;
// Index where findStr is found; each occasion contains the link to an image
@@ -95,7 +95,7 @@ public class NewgroundsRipper extends AbstractHTMLRipper {
if(i == indices.size() - 1){
s = documentHTMLString.substring(indices.get(i) + 2);
} else{
- s = documentHTMLString.substring(indices.get(i) + 2, indices.get(i + 1));
+ s = documentHTMLString.substring(indices.get(i) + 1, indices.get(i + 1));
}
s = s.replaceAll("\n", "").replaceAll("\t", "")
@@ -106,13 +106,14 @@ public class NewgroundsRipper extends AbstractHTMLRipper {
if (m.lookingAt()) {
String testURL = m.group(3) + "_" + this.username + "_" + m.group(1);
+ testURL = testURL.replace("_full", "");
// Open new document to get full sized image
try {
Document imagePage = Http.url(inLink + m.group(1)).get();
for(String extensions: this.ALLOWED_EXTENSIONS){
if(imagePage.toString().contains(testURL + "." + extensions)){
- imageUrl += m.group(2) + "/" + m.group(3) + "_" + this.username + "_" + m.group(1) + "." + extensions;
+ imageUrl += m.group(2) + "/" + m.group(3).replace("_full","") + "_" + this.username + "_" + m.group(1) + "." + extensions;
imageURLs.add(imageUrl);
break;
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java
deleted file mode 100644
index bafa3690..00000000
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java
+++ /dev/null
@@ -1,80 +0,0 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-public class NewsfilterRipper extends AbstractHTMLRipper {
-
- private static final String HOST = "newsfilter";
- private static final String DOMAIN = "newsfilter.org";
-
- public NewsfilterRipper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
- String u = url.toExternalForm();
- if (u.indexOf('#') >= 0) {
- u = u.substring(0, u.indexOf('#'));
- }
- u = u.replace("https?://m\\.newsfilter\\.org", "http://newsfilter.org");
- return new URL(u);
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/([^/]+)$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- return m.group(2);
- }
- throw new MalformedURLException(
- "Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" +
- " Got: " + url);
- }
-
- @Override
- public String getHost() {
- return HOST;
- }
-
- @Override
- protected String getDomain() {
- return DOMAIN;
- }
-
- @Override
- protected Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
- @Override
- protected List getURLsFromPage(Document page) {
- List imgURLs = new ArrayList<>();
- Elements thumbnails = page.select("#galleryImages .inner-block img");
- for (Element thumb : thumbnails) {
- String thumbUrl = thumb.attr("src");
- String picUrl = thumbUrl.replace("thumbs/", "");
- // use HTTP instead of HTTPS (less headaches)
- imgURLs.add(picUrl.replaceFirst("https://", "http://"));
- }
- return imgURLs;
- }
-
- @Override
- protected void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index));
- }
-}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java
index 86079edc..35a1f8ad 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -29,8 +31,6 @@ public class NfsfwRipper extends AbstractHTMLRipper {
"https?://[wm.]*nfsfw.com/gallery/v/[^/]+/(.+)$"
);
- // cached first page
- private Document fstPage;
// threads pool for downloading images from image pages
private DownloadThreadPool nfsfwThreadPool;
@@ -49,13 +49,6 @@ public class NfsfwRipper extends AbstractHTMLRipper {
return HOST;
}
- @Override
- protected Document getFirstPage() throws IOException {
- // cache the first page
- this.fstPage = Http.url(url).get();
- return fstPage;
- }
-
@Override
public Document getNextPage(Document page) throws IOException {
String nextURL = null;
@@ -113,13 +106,13 @@ public class NfsfwRipper extends AbstractHTMLRipper {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
// always start on the first page of an album
// (strip the options after the '?')
String u = url.toExternalForm();
if (u.contains("?")) {
u = u.substring(0, u.indexOf("?"));
- return new URL(u);
+ return new URI(u).toURL();
} else {
return url;
}
@@ -157,9 +150,15 @@ public class NfsfwRipper extends AbstractHTMLRipper {
@Override
public boolean pageContainsAlbums(URL url) {
- List imageURLs = getImagePageURLs(fstPage);
- List subalbumURLs = getSubalbumURLs(fstPage);
- return imageURLs.isEmpty() && !subalbumURLs.isEmpty();
+ try {
+ final var fstPage = getCachedFirstPage();
+ List imageURLs = getImagePageURLs(fstPage);
+ List subalbumURLs = getSubalbumURLs(fstPage);
+ return imageURLs.isEmpty() && !subalbumURLs.isEmpty();
+ } catch (IOException | URISyntaxException e) {
+ LOGGER.error("Unable to load " + url, e);
+ return false;
+ }
}
@Override
@@ -196,10 +195,10 @@ public class NfsfwRipper extends AbstractHTMLRipper {
/**
* Helper class to find and download images found on "image" pages
*/
- private class NfsfwImageThread extends Thread {
- private URL url;
- private String subdir;
- private int index;
+ private class NfsfwImageThread implements Runnable {
+ private final URL url;
+ private final String subdir;
+ private final int index;
NfsfwImageThread(URL url, String subdir, int index) {
super();
@@ -223,8 +222,8 @@ public class NfsfwRipper extends AbstractHTMLRipper {
if (file.startsWith("/")) {
file = "http://nfsfw.com" + file;
}
- addURLToDownload(new URL(file), getPrefix(index), this.subdir);
- } catch (IOException e) {
+ addURLToDownload(new URI(file).toURL(), getPrefix(index), this.subdir);
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java
index 49fc1d8a..fe50f1f1 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java
@@ -126,7 +126,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
List imageURLs = new ArrayList<>();
Elements thumbs = page.select("a.gallerythumb > img");
for (Element el : thumbs) {
- imageURLs.add(el.attr("data-src").replaceAll("t\\.n", "i.n").replaceAll("t\\.", "."));
+ imageURLs.add(el.attr("data-src").replaceAll("://t", "://i").replaceAll("t\\.", "."));
}
return imageURLs;
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NsfwXxxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NsfwXxxRipper.java
new file mode 100644
index 00000000..7e26faa2
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NsfwXxxRipper.java
@@ -0,0 +1,135 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractJSONRipper;
+import com.rarchives.ripme.utils.Http;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.json.JSONArray;
+import org.json.JSONObject;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+public class NsfwXxxRipper extends AbstractJSONRipper {
+
+ public NsfwXxxRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ protected String getDomain() {
+ return "nsfw.xxx";
+ }
+
+ @Override
+ public String getHost() {
+ return "nsfw_xxx";
+ }
+
+
+ @Override
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
+ String u = url.toExternalForm();
+ // https://nsfw.xxx/user/kelly-kat/foo -> https://nsfw.xxx/user/kelly-kat
+ // https://nsfw.xxx/user/kelly-kat -> https://nsfw.xxx/user/kelly-kat
+ // keep up to and including the username
+ u = u.replaceAll("https?://nsfw.xxx/user/([^/]+)/?.*", "https://nsfw.xxx/user/$1");
+ if (!u.contains("nsfw.xxx/user")) {
+ throw new MalformedURLException("Invalid URL: " + url);
+ }
+
+ return new URI(u).toURL();
+ }
+
+ String getUser() throws MalformedURLException {
+ return getGID(url);
+ }
+
+ URL getPage(int page) throws MalformedURLException, URISyntaxException {
+ return new URI("https://nsfw.xxx/slide-page/" + page + "?nsfw%5B%5D=0&types%5B%5D=image&types%5B%5D=video&types%5B%5D=gallery&slider=1&jsload=1&user=" + getUser()).toURL();
+ }
+
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p = Pattern.compile("https://nsfw.xxx/user/([^/]+)/?$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected URL format: " +
+ "nsfw.xxx/user/USER - got " + url + " instead");
+ }
+
+
+ int currentPage = 1;
+
+ @Override
+ protected JSONObject getFirstPage() throws IOException, URISyntaxException {
+ return Http.url(getPage(1)).getJSON();
+ }
+
+ List descriptions = new ArrayList<>();
+
+ @Override
+ protected JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException {
+ currentPage++;
+ JSONObject nextPage = Http.url(getPage(doc.getInt("page") + 1)).getJSON();
+ JSONArray items = nextPage.getJSONArray("items");
+ if (items.isEmpty()) {
+ throw new IOException("No more pages");
+ }
+ return nextPage;
+ }
+
+ class ApiEntry {
+ String srcUrl;
+ String author;
+ String title;
+
+ public ApiEntry(String srcUrl, String author, String title) {
+ this.srcUrl = srcUrl;
+ this.author = author;
+ this.title = title;
+ }
+ }
+
+ @Override
+ protected List getURLsFromJSON(JSONObject json) {
+ JSONArray items = json.getJSONArray("items");
+ List data = IntStream
+ .range(0, items.length())
+ .mapToObj(items::getJSONObject)
+ .map(o -> {
+ String srcUrl;
+ if(o.has("src")) {
+ srcUrl = o.getString("src");
+ } else {
+ // video source
+ Pattern videoHtmlSrcPattern = Pattern.compile("src=\"([^\"]+)\"");
+ Matcher matches = videoHtmlSrcPattern.matcher(o.getString("html"));
+ matches.find();
+ srcUrl = StringEscapeUtils.unescapeHtml(matches.group(1));
+ }
+
+ return new ApiEntry(srcUrl, o.getString("author"), o.getString("title"));
+ })
+ .toList();
+
+ data.forEach(e -> descriptions.add(e.title));
+ return data.stream().map(e -> e.srcUrl).collect(Collectors.toList());
+ }
+
+ @Override
+ protected void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index) + descriptions.get(index - 1) + "_" , "", "", null);
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java
index 3300da50..ea145aad 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NudeGalsRipper.java
@@ -16,8 +16,6 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class NudeGalsRipper extends AbstractHTMLRipper {
- // Current HTML document
- private Document albumDoc = null;
public NudeGalsRipper(URL url) throws IOException {
super(url);
@@ -50,14 +48,6 @@ public class NudeGalsRipper extends AbstractHTMLRipper {
+ " Got: " + url);
}
- @Override
- public Document getFirstPage() throws IOException {
- if (albumDoc == null) {
- albumDoc = Http.url(url).get();
- }
- return albumDoc;
- }
-
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
@@ -77,4 +67,4 @@ public class NudeGalsRipper extends AbstractHTMLRipper {
// Send referrer when downloading images
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java
index a5183397..e03d3bdc 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java
@@ -46,12 +46,6 @@ public class OglafRipper extends AbstractHTMLRipper {
return getDomain();
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("div#nav > a > div#nx").first() == null) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java
index d2421f37..39d56b83 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java
@@ -3,25 +3,28 @@ package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
-import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.log4j.Logger;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class PahealRipper extends AbstractHTMLRipper {
- private static final Logger logger = Logger.getLogger(PahealRipper.class);
+ private static final Logger logger = LogManager.getLogger(PahealRipper.class);
private static Map cookies = null;
private static Pattern gidPattern = null;
@@ -56,7 +59,7 @@ public class PahealRipper extends AbstractHTMLRipper {
@Override
public Document getNextPage(Document page) throws IOException {
for (Element e : page.select("#paginator a")) {
- if (e.text().toLowerCase().equals("next")) {
+ if (e.text().equalsIgnoreCase("next")) {
return Http.url(e.absUrl("href")).cookies(getCookies()).get();
}
}
@@ -88,12 +91,12 @@ public class PahealRipper extends AbstractHTMLRipper {
name = name.substring(0, name.length() - ext.length());
}
- File outFile = new File(workingDir.getCanonicalPath()
- + File.separator
+ Path outFile = Paths.get(workingDir
+ + "/"
+ Utils.filesystemSafe(new URI(name).getPath())
+ ext);
addURLToDownload(url, outFile);
- } catch (IOException | URISyntaxException ex) {
+ } catch (URISyntaxException ex) {
logger.error("Error while downloading URL " + url, ex);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java
index 8f5c8c37..100068ed 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java
@@ -3,6 +3,11 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.URL;
+import com.rarchives.ripme.utils.Http;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.select.Elements;
+
public class PawooRipper extends MastodonRipper {
public PawooRipper(URL url) throws IOException {
super(url);
@@ -17,4 +22,5 @@ public class PawooRipper extends MastodonRipper {
public String getDomain() {
return "pawoo.net";
}
+
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
index 680d2c09..097fe2c0 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -82,7 +84,7 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
LOGGER.info(url);
String u = url.toExternalForm();
if (u.contains("?")) {
@@ -93,11 +95,11 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
// append trailing slash
u = u + "/";
}
- return new URL(u);
+ return new URI(u).toURL();
}
@Override
- public String getGID(URL url) throws MalformedURLException {
+ public String getGID(URL url) throws MalformedURLException, URISyntaxException {
Matcher m;
URL sanitized = sanitizeURL(url);
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PichunterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PichunterRipper.java
index e6c5d110..bdb5f528 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PichunterRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PichunterRipper.java
@@ -63,12 +63,6 @@ public class PichunterRipper extends AbstractHTMLRipper {
return m.matches();
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
// We use comic-nav-next to the find the next page
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PicstatioRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PicstatioRipper.java
index 1bd103b5..65d43d39 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PicstatioRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PicstatioRipper.java
@@ -51,12 +51,6 @@ public class PicstatioRipper extends AbstractHTMLRipper {
"www.picstatio.com//ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("a.next_page") != null) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixRipper.java
index b4579684..f021269f 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixRipper.java
@@ -41,12 +41,6 @@ public class PorncomixRipper extends AbstractHTMLRipper {
"porncomix.info/comic - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixinfoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixinfoRipper.java
new file mode 100644
index 00000000..8aef59a6
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PorncomixinfoRipper.java
@@ -0,0 +1,79 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+
+public class PorncomixinfoRipper extends AbstractHTMLRipper {
+
+ public PorncomixinfoRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "porncomixinfo";
+ }
+
+ @Override
+ public String getDomain() {
+ return "porncomixinfo.net";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p = Pattern.compile("https://porncomixinfo.net/chapter/([a-zA-Z1-9_-]*)/([a-zA-Z1-9_-]*)/?$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected porncomixinfo URL format: " +
+ "porncomixinfo.net/chapter/CHAP/ID - got " + url + " instead");
+ }
+
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ // Find next page
+ String nextUrl = "";
+ // We use comic-nav-next to the find the next page
+ Element elem = doc.select("a.next_page").first();
+ if (elem == null) {
+ throw new IOException("No more pages");
+ }
+ String nextPage = elem.attr("href");
+ // Some times this returns a empty string
+ // This for stops that
+ if (nextPage.equals("")) {
+ return null;
+ }
+ else {
+ return Http.url(nextPage).get();
+ }
+ }
+
+ @Override
+ public List getURLsFromPage(Document doc) {
+ List result = new ArrayList<>();
+ for (Element el : doc.select("img.wp-manga-chapter-img")) { {
+ String imageSource = el.attr("src");
+ result.add(imageSource);
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
index 197bdcbd..a2ce4a19 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
@@ -1,9 +1,11 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
+import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@@ -47,12 +49,12 @@ public class PornhubRipper extends AbstractHTMLRipper {
}
@Override
- public Document getNextPage(Document page) throws IOException {
+ public Document getNextPage(Document page) throws IOException, URISyntaxException {
Elements nextPageLink = page.select("li.page_next > a");
if (nextPageLink.isEmpty()){
throw new IOException("No more pages");
} else {
- URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
+ URL nextURL = this.url.toURI().resolve(nextPageLink.first().attr("href")).toURL();
return Http.url(nextURL).get();
}
}
@@ -74,7 +76,7 @@ public class PornhubRipper extends AbstractHTMLRipper {
@Override
protected void downloadURL(URL url, int index) {
- PornhubImageThread t = new PornhubImageThread(url, index, this.workingDir);
+ PornhubImageThread t = new PornhubImageThread(url, index, this.workingDir.toPath());
pornhubThreadPool.addThread(t);
try {
Thread.sleep(IMAGE_SLEEP_TIME);
@@ -83,13 +85,13 @@ public class PornhubRipper extends AbstractHTMLRipper {
}
}
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
// always start on the first page of an album
// (strip the options after the '?')
String u = url.toExternalForm();
if (u.contains("?")) {
u = u.substring(0, u.indexOf("?"));
- return new URL(u);
+ return new URI(u).toURL();
} else {
return url;
}
@@ -126,11 +128,11 @@ public class PornhubRipper extends AbstractHTMLRipper {
*
* Handles case when site has IP-banned the user.
*/
- private class PornhubImageThread extends Thread {
- private URL url;
- private int index;
+ private class PornhubImageThread implements Runnable {
+ private final URL url;
+ private final int index;
- PornhubImageThread(URL url, int index, File workingDir) {
+ PornhubImageThread(URL url, int index, Path workingDir) {
super();
this.url = url;
this.index = index;
@@ -159,10 +161,10 @@ public class PornhubRipper extends AbstractHTMLRipper {
prefix = String.format("%03d_", index);
}
- URL imgurl = new URL(url, imgsrc);
+ URL imgurl = url.toURI().resolve(imgsrc).toURL();
addURLToDownload(imgurl, prefix);
- } catch (IOException e) {
+ } catch (IOException | URISyntaxException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PornpicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PornpicsRipper.java
index b779c480..799f7294 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornpicsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornpicsRipper.java
@@ -41,12 +41,6 @@ public class PornpicsRipper extends AbstractHTMLRipper {
"www.pornpics.com/galleries/ID - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ReadcomicRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ReadcomicRipper.java
new file mode 100644
index 00000000..55b3559a
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ReadcomicRipper.java
@@ -0,0 +1,55 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+
+public class ReadcomicRipper extends ViewcomicRipper {
+
+ public ReadcomicRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "read-comic";
+ }
+
+ @Override
+ public String getDomain() {
+ return "read-comic.com";
+ }
+
+
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p = Pattern.compile("https?://read-comic.com/([a-zA-Z1-9_-]*)/?$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected view-comic URL format: " +
+ "read-comic.com/COMIC_NAME - got " + url + " instead");
+ }
+
+ @Override
+ public List getURLsFromPage(Document doc) {
+ List result = new ArrayList();
+ for (Element el : doc.select("div.pinbin-copy > a > img")) {
+ result.add(el.attr("src"));
+ }
+ return result;
+ }
+
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
index e68e477d..dcfa14e7 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
@@ -1,15 +1,24 @@
package com.rarchives.ripme.ripper.rippers;
-import java.io.File;
import java.io.IOException;
+import java.io.OutputStream;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage;
+import j2html.TagCreator;
+import j2html.tags.ContainerTag;
+import j2html.tags.specialized.DivTag;
import org.json.JSONArray;
+import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONTokener;
@@ -18,6 +27,9 @@ import com.rarchives.ripme.ui.UpdateUtils;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
+import org.jsoup.Jsoup;
+
+import static j2html.TagCreator.*;
public class RedditRipper extends AlbumRipper {
@@ -46,41 +58,52 @@ public class RedditRipper extends AlbumRipper {
}
@Override
- public URL sanitizeURL(URL url) throws MalformedURLException {
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
String u = url.toExternalForm();
// Strip '/u/' from URL
u = u.replaceAll("reddit\\.com/u/", "reddit.com/user/");
- return new URL(u);
+ return new URI(u).toURL();
}
- private URL getJsonURL(URL url) throws MalformedURLException {
+ private URL getJsonURL(URL url) throws MalformedURLException, URISyntaxException {
+ // Convert gallery to post link and append ".json"
+ Pattern p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/gallery/([a-zA-Z0-9]+).*$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return new URI("https://reddit.com/" +m.group(m.groupCount())+ ".json").toURL();
+ }
+
// Append ".json" to URL in appropriate location.
String result = url.getProtocol() + "://" + url.getHost() + url.getPath() + ".json";
if (url.getQuery() != null) {
result += "?" + url.getQuery();
}
- return new URL(result);
+ return new URI(result).toURL();
}
@Override
public void rip() throws IOException {
- URL jsonURL = getJsonURL(this.url);
- while (true) {
- if (shouldAddURL()) {
- sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_COMPLETE_HISTORY, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
- break;
- }
- jsonURL = getAndParseAndReturnNext(jsonURL);
- if (jsonURL == null || isThisATest() || isStopped()) {
- break;
+ try {
+ URL jsonURL = getJsonURL(this.url);
+ while (true) {
+ if (shouldAddURL()) {
+ sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_COMPLETE_HISTORY, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
+ break;
+ }
+ jsonURL = getAndParseAndReturnNext(jsonURL);
+ if (jsonURL == null || isThisATest() || isStopped()) {
+ break;
+ }
}
+ } catch (URISyntaxException e) {
+ new IOException(e.getMessage());
}
waitForThreads();
}
- private URL getAndParseAndReturnNext(URL url) throws IOException {
+ private URL getAndParseAndReturnNext(URL url) throws IOException, URISyntaxException {
JSONArray jsonArray = getJsonArrayFromURL(url), children;
JSONObject json, data;
URL nextURL = null;
@@ -95,7 +118,19 @@ public class RedditRipper extends AlbumRipper {
}
children = data.getJSONArray("children");
for (int j = 0; j < children.length(); j++) {
- parseJsonChild(children.getJSONObject(j));
+ try {
+ parseJsonChild(children.getJSONObject(j));
+
+ if (children.getJSONObject(j).getString("kind").equals("t3") &&
+ children.getJSONObject(j).getJSONObject("data").getBoolean("is_self")
+ ) {
+ URL selfPostURL = new URI(children.getJSONObject(j).getJSONObject("data").getString("url")).toURL();
+ System.out.println(selfPostURL.toExternalForm());
+ saveText(getJsonArrayFromURL(getJsonURL(selfPostURL)));
+ }
+ } catch (Exception e) {
+ LOGGER.debug("at index " + i + ", for this data: " + data.toString() + e);
+ }
}
if (data.has("after") && !data.isNull("after")) {
String nextURLString = Utils.stripURLParameter(url.toExternalForm(), "after");
@@ -105,7 +140,7 @@ public class RedditRipper extends AlbumRipper {
else {
nextURLString = nextURLString.concat("?after=" + data.getString("after"));
}
- nextURL = new URL(nextURLString);
+ nextURL = new URI(nextURLString).toURL();
}
}
@@ -188,6 +223,8 @@ public class RedditRipper extends AlbumRipper {
if (data.getBoolean("is_self")) {
// TODO Parse self text
handleBody(data.getString("selftext"), data.getString("id"), data.getString("title"));
+ } else if (!data.isNull("gallery_data") && !data.isNull("media_metadata")) {
+ handleGallery(data.getJSONObject("gallery_data").getJSONArray("items"), data.getJSONObject("media_metadata"), data.getString("id"), data.getString("title"));
} else {
// Get link
handleURL(data.getString("url"), data.getString("id"), data.getString("title"));
@@ -215,8 +252,123 @@ public class RedditRipper extends AlbumRipper {
}
}
+ private void saveText(JSONArray jsonArray) throws JSONException {
+ Path saveFileAs;
+
+ JSONObject selfPost = jsonArray.getJSONObject(0).getJSONObject("data")
+ .getJSONArray("children").getJSONObject(0).getJSONObject("data");
+ JSONArray comments = jsonArray.getJSONObject(1).getJSONObject("data")
+ .getJSONArray("children");
+
+ if (selfPost.getString("selftext").equals("")) { return; }
+
+ final String title = selfPost.getString("title");
+ final String id = selfPost.getString("id");
+ final String author = selfPost.getString("author");
+ final String creationDate = new Date((long) selfPost.getInt("created") * 1000).toString();
+ final String subreddit = selfPost.getString("subreddit");
+ final String selfText = selfPost.getString("selftext_html");
+ final String permalink = selfPost.getString("url");
+
+ String html = TagCreator.html(
+ head(
+ title(title),
+ style(rawHtml(HTML_STYLING))
+ ),
+ body(
+ div(
+ h1(title),
+ a(subreddit).withHref("https://www.reddit.com/r/" + subreddit),
+ a("Original").withHref(permalink),
+ br()
+ ).withClass("thing"),
+ div(
+ div(
+ span(
+ a(author).withHref("https://www.reddit.com/u/" + author)
+ ).withClass("author op")
+ ).withClass("thing oppost")
+ .withText(creationDate)
+ .with(rawHtml(Jsoup.parse(selfText).text()))
+ ).withClass("flex")
+ ).with(getComments(comments, author)),
+ script(rawHtml(HTML_SCRIPT))
+ ).renderFormatted();
+
+ try {
+ saveFileAs = Utils.getPath(workingDir
+ + "/"
+ + id + "_" + Utils.filesystemSafe(title)
+ + ".html");
+ OutputStream out = Files.newOutputStream(saveFileAs);
+ out.write(html.getBytes());
+ out.close();
+ } catch (IOException e) {
+ LOGGER.error("[!] Error creating save file path for description '" + url + "':", e);
+ return;
+ }
+
+ LOGGER.debug("Downloading " + url + "'s self post to " + saveFileAs);
+ super.retrievingSource(permalink);
+ if (!Files.exists(saveFileAs.getParent())) {
+ LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
+ try {
+ Files.createDirectory(saveFileAs.getParent());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ private ContainerTag getComments(JSONArray comments, String author) {
+ ContainerTag commentsDiv = div().withId("comments");
+
+ for (int i = 0; i < comments.length(); i++) {
+ JSONObject data = comments.getJSONObject(i).getJSONObject("data");
+
+ try {
+ ContainerTag commentDiv =
+ div(
+ span(data.getString("author")).withClasses("author", iff(data.getString("author").equals(author), "op")),
+ a(new Date((long) data.getInt("created") * 1000).toString()).withHref("#" + data.getString("name"))
+ ).withClass("thing comment").withId(data.getString("name"))
+ .with(rawHtml(Jsoup.parse(data.getString("body_html")).text()));
+ getNestedComments(data, commentDiv, author);
+ commentsDiv.with(commentDiv);
+ } catch (Exception e) {
+ LOGGER.debug("at index " + i + ", for this data: " + data.toString() + e);
+ }
+ }
+ return commentsDiv;
+ }
+
+ private ContainerTag getNestedComments(JSONObject data, ContainerTag parentDiv, String author) {
+ if (data.has("replies") && data.get("replies") instanceof JSONObject) {
+ JSONArray commentChildren = data.getJSONObject("replies").getJSONObject("data").getJSONArray("children");
+ for (int i = 0; i < commentChildren.length(); i++) {
+ JSONObject nestedComment = commentChildren
+ .getJSONObject(i).getJSONObject("data");
+
+ String nestedCommentAuthor = nestedComment.optString("author");
+ if (!nestedCommentAuthor.isBlank()) {
+ ContainerTag childDiv =
+ div(
+ div(
+ span(nestedCommentAuthor).withClasses("author", iff(nestedCommentAuthor.equals(author), "op")),
+ a(new Date((long) nestedComment.getInt("created") * 1000).toString()).withHref("#" + nestedComment.getString("name"))
+ ).withClass("comment").withId(nestedComment.getString("name"))
+ .with(rawHtml(Jsoup.parse(nestedComment.getString("body_html")).text()))
+ ).withClass("child");
+
+ parentDiv.with(getNestedComments(nestedComment, childDiv, author));
+ }
+ }
+ }
+ return parentDiv;
+ }
+
private URL parseRedditVideoMPD(String vidURL) {
- org.jsoup.nodes.Document doc = null;
+ org.jsoup.nodes.Document doc;
try {
doc = Http.url(vidURL + "/DASHPlaylist.mpd").ignoreContentType().get();
int largestHeight = 0;
@@ -232,8 +384,8 @@ public class RedditRipper extends AlbumRipper {
baseURL = doc.select("MPD > Period > AdaptationSet > Representation[height=" + height + "]").select("BaseURL").text();
}
}
- return new URL(vidURL + "/" + baseURL);
- } catch (IOException e) {
+ return new URI(vidURL + "/" + baseURL).toURL();
+ } catch (IOException | URISyntaxException e) {
e.printStackTrace();
}
return null;
@@ -243,8 +395,8 @@ public class RedditRipper extends AlbumRipper {
private void handleURL(String theUrl, String id, String title) {
URL originalURL;
try {
- originalURL = new URL(theUrl);
- } catch (MalformedURLException e) {
+ originalURL = new URI(theUrl).toURL();
+ } catch (MalformedURLException | URISyntaxException e) {
return;
}
String subdirectory = "";
@@ -264,21 +416,21 @@ public class RedditRipper extends AlbumRipper {
Matcher m = p.matcher(url);
if (m.matches()) {
// It's from reddituploads. Assume .jpg extension.
- String savePath = this.workingDir + File.separator;
- savePath += id + "-" + m.group(1) + title + ".jpg";
- addURLToDownload(urls.get(0), new File(savePath));
+ String savePath = this.workingDir + "/";
+ savePath += id + "-" + m.group(1) + Utils.filesystemSafe(title) + ".jpg";
+ addURLToDownload(urls.get(0), Utils.getPath(savePath));
}
if (url.contains("v.redd.it")) {
- String savePath = this.workingDir + File.separator;
- savePath += id + "-" + url.split("/")[3] + title + ".mp4";
+ String savePath = this.workingDir + "/";
+ savePath += id + "-" + url.split("/")[3] + Utils.filesystemSafe(title) + ".mp4";
URL urlToDownload = parseRedditVideoMPD(urls.get(0).toExternalForm());
if (urlToDownload != null) {
LOGGER.info("url: " + urlToDownload + " file: " + savePath);
- addURLToDownload(urlToDownload, new File(savePath));
+ addURLToDownload(urlToDownload, Utils.getPath(savePath));
}
}
else {
- addURLToDownload(urls.get(0), id + title, "", theUrl, null);
+ addURLToDownload(urls.get(0), Utils.filesystemSafe(id + title), "", theUrl, null);
}
} else if (urls.size() > 1) {
for (int i = 0; i < urls.size(); i++) {
@@ -291,6 +443,35 @@ public class RedditRipper extends AlbumRipper {
}
}
+ private void handleGallery(JSONArray data, JSONObject metadata, String id, String title){
+ //TODO handle captions and caption urls
+ String subdirectory = "";
+ if (Utils.getConfigBoolean("reddit.use_sub_dirs", true)) {
+ if (Utils.getConfigBoolean("album_titles.save", true)) {
+ subdirectory = title;
+ }
+ }
+ for (int i = 0; i < data.length(); i++) {
+ JSONObject media = metadata.getJSONObject(data.getJSONObject(i).getString("media_id"));
+ String prefix = id + "-";
+ if (Utils.getConfigBoolean("download.save_order", true)) {
+ //announcement says up to 20 (https://www.reddit.com/r/announcements/comments/hrrh23/now_you_can_make_posts_with_multiple_images/)
+ prefix += String.format("%02d-", i + 1);
+ }
+ try {
+ URL mediaURL;
+ if (!media.getJSONObject("s").isNull("gif")) {
+ mediaURL = new URI(media.getJSONObject("s").getString("gif").replaceAll("&", "&")).toURL();
+ } else {
+ mediaURL = new URI(media.getJSONObject("s").getString("u").replaceAll("&", "&")).toURL();
+ }
+ addURLToDownload(mediaURL, prefix, subdirectory);
+ } catch (MalformedURLException | JSONException | URISyntaxException e) {
+ LOGGER.error("[!] Unable to parse gallery JSON:\ngallery_data:\n" + data +"\nmedia_metadata:\n" + metadata);
+ }
+ }
+ }
+
@Override
public String getHost() {
return HOST;
@@ -312,6 +493,13 @@ public class RedditRipper extends AlbumRipper {
return "post_" + m.group(m.groupCount());
}
+ // Gallery
+ p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/gallery/([a-zA-Z0-9]+).*$");
+ m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return "post_" + m.group(m.groupCount());
+ }
+
// Subreddit
p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/r/([a-zA-Z0-9_]+).*$");
m = p.matcher(url.toExternalForm());
@@ -319,7 +507,10 @@ public class RedditRipper extends AlbumRipper {
return "sub_" + m.group(m.groupCount());
}
- throw new MalformedURLException("Only accepts user pages, subreddits, or post, can't understand " + url);
+ throw new MalformedURLException("Only accepts user pages, subreddits, post, or gallery can't understand " + url);
}
+ private static final String HTML_STYLING = " .author { font-weight: bold; } .op { color: blue; } .comment { border: 0px; margin: 0 0 25px; padding-left: 5px; } .child { margin: 2px 0 0 20px; border-left: 2px dashed #AAF; } .collapsed { background: darkgrey; margin-bottom: 0; } .collapsed > div { display: none; } .md { max-width: 840px; padding-right: 1em; } h1 { margin: 0; } body { position: relative; background-color: #eeeeec; color: #00000a; font-weight: 400; font-style: normal; font-variant: normal; font-family: Helvetica,Arial,sans-serif; line-height: 1.4 } blockquote { margin: 5px 5px 5px 15px; padding: 1px 1px 1px 15px; max-width: 60em; border: 1px solid #ccc; border-width: 0 0 0 1px; } pre { white-space: pre-wrap; } img, video { max-width: 60vw; max-height: 90vh; object-fit: contain; } .thing { overflow: hidden; margin: 0 5px 3px 40px; border: 1px solid #e0e0e0; background-color: #fcfcfb; } :target > .md { border: 5px solid blue; } .post { margin-bottom: 20px; margin-top: 20px; } .gold { background: goldenrod; } .silver { background: silver; } .platinum { background: aqua; } .deleted { background: #faa; } .md.deleted { background: inherit; border: 5px solid #faa; } .oppost { background-color: #EEF; } blockquote > p { margin: 0; } #related { max-height: 20em; overflow-y: scroll; background-color: #F4FFF4; } #related h3 { position: sticky; top: 0; background-color: white; } .flex { display: flex; flex-flow: wrap; flex-direction: row-reverse; justify-content: flex-end; } ";
+ private static final String HTML_SCRIPT = "document.addEventListener('mousedown', function(e) { var t = e.target; if (t.className == 'author') { t = t.parentElement; } if (t.classList.contains('comment')) { t.classList.toggle('collapsed'); e.preventDefault(); e.stopPropagation(); return false; } });";
+
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java
new file mode 100644
index 00000000..e82db4b2
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java
@@ -0,0 +1,370 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.utils.Http;
+
+import org.json.JSONObject;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.http.client.utils.URIBuilder;
+
+import com.rarchives.ripme.ripper.AbstractJSONRipper;
+
+public class RedgifsRipper extends AbstractJSONRipper {
+
+ private static final String HOST = "redgifs.com";
+ private static final String HOST_2 = "gifdeliverynetwork.com";
+ private static final String GIFS_DETAIL_ENDPOINT = "https://api.redgifs.com/v2/gifs/%s";
+ private static final String USERS_SEARCH_ENDPOINT = "https://api.redgifs.com/v2/users/%s/search";
+ private static final String GALLERY_ENDPOINT = "https://api.redgifs.com/v2/gallery/%s";
+ private static final String SEARCH_ENDPOINT = "https://api.redgifs.com/v2/search/%s";
+ private static final String TAGS_ENDPOINT = "https://api.redgifs.com/v2/gifs/search";
+ private static final String TEMPORARY_AUTH_ENDPOINT = "https://api.redgifs.com/v2/auth/temporary";
+ private static final Pattern PROFILE_PATTERN = Pattern.compile("^https?://[a-zA-Z0-9.]*redgifs\\.com/users/([a-zA-Z0-9_.-]+).*$");
+ private static final Pattern SEARCH_PATTERN = Pattern.compile("^https?:\\/\\/[a-zA-Z0-9.]*redgifs\\.com\\/search(?:\\/[a-zA-Z]+)?\\?.*?query=([a-zA-Z0-9-_+%]+).*$");
+ private static final Pattern TAGS_PATTERN = Pattern.compile("^https?:\\/\\/[a-zA-Z0-9.]*redgifs\\.com\\/gifs\\/([a-zA-Z0-9_.,-]+).*$");
+ private static final Pattern SINGLETON_PATTERN = Pattern.compile("^https?://[a-zA-Z0-9.]*redgifs\\.com/watch/([a-zA-Z0-9_-]+).*$");
+
+ /**
+ * Keep a single auth token for the complete lifecycle of the app.
+ * This should prevent fetching of multiple tokens.
+ */
+ private static String authToken = "";
+
+ String username = "";
+ int count = 40;
+ int currentPage = 1;
+ int maxPages = 1;
+
+ public RedgifsRipper(URL url) throws IOException, URISyntaxException {
+ super(new URI(url.toExternalForm().replace("thumbs.", "")).toURL());
+ }
+
+ @Override
+ public String getDomain() { return "redgifs.com"; }
+
+ @Override
+ public String getHost() {
+ return "redgifs";
+ }
+
+ @Override
+ public boolean canRip(URL url) {
+ return url.getHost().endsWith(HOST) || url.getHost().endsWith(HOST_2);
+ }
+
+ @Override
+ public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
+ String sUrl = url.toExternalForm();
+ sUrl = sUrl.replace("/gifs/detail", "");
+ sUrl = sUrl.replace("/amp", "");
+ sUrl = sUrl.replace("gifdeliverynetwork.com", "redgifs.com/watch");
+ return new URI(sUrl).toURL();
+ }
+
+ public Matcher isProfile() {
+ return PROFILE_PATTERN.matcher(url.toExternalForm());
+ }
+
+ public Matcher isSearch() {
+ return SEARCH_PATTERN.matcher(url.toExternalForm());
+ }
+
+ public Matcher isTags() {
+ return TAGS_PATTERN.matcher(url.toExternalForm());
+ }
+
+ public Matcher isSingleton() {
+ return SINGLETON_PATTERN.matcher(url.toExternalForm());
+ }
+
+ @Override
+ public JSONObject getFirstPage() throws IOException {
+ try {
+ if (authToken == null || authToken.isBlank()) {
+ fetchAuthToken();
+ }
+
+ if (isSingleton().matches()) {
+ maxPages = 1;
+ String gifDetailsURL = String.format(GIFS_DETAIL_ENDPOINT, getGID(url));
+ return Http.url(gifDetailsURL).header("Authorization", "Bearer " + authToken).getJSON();
+ } else if (isSearch().matches() || isTags().matches()) {
+ var json = Http.url(getSearchOrTagsURL()).header("Authorization", "Bearer " + authToken).getJSON();
+ maxPages = json.getInt("pages");
+ return json;
+ } else {
+ username = getGID(url);
+ var uri = new URIBuilder(String.format(USERS_SEARCH_ENDPOINT, username));
+ uri.addParameter("order", "new");
+ uri.addParameter("count", Integer.toString(count));
+ uri.addParameter("page", Integer.toString(currentPage));
+ var json = Http.url(uri.build().toURL()).header("Authorization", "Bearer " + authToken).getJSON();
+ maxPages = json.getInt("pages");
+ return json;
+ }
+ } catch (URISyntaxException e) {
+ throw new IOException("Failed to build first page url", e);
+ }
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Matcher m = isProfile();
+ if (m.matches()) {
+ return m.group(1);
+ }
+ m = isSearch();
+ if (m.matches()) {
+ var sText = m.group(1);
+ if (sText == null || sText.isBlank()) {
+ throw new MalformedURLException(String.format("Expected redgifs.com/search?query=searchtext\n Got %s", url));
+ }
+ sText = URLDecoder.decode(sText, StandardCharsets.UTF_8);
+ sText = sText.replaceAll("[^A-Za-z0-9_-]", "-");
+ return sText;
+ }
+ m = isTags();
+ if (m.matches()) {
+ var sText = m.group(1);
+ if (sText == null || sText.isBlank()) {
+ throw new MalformedURLException(String.format("Expected redgifs.com/gifs/searchtags\n Got %s", url));
+ }
+ sText = URLDecoder.decode(sText, StandardCharsets.UTF_8);
+ var list = Arrays.asList(sText.split(","));
+ if (list.size() > 1) {
+ LOGGER.warn("Url with multiple tags found. \nThey will be sorted alphabetically for folder name.");
+ }
+ Collections.sort(list);
+ var gid = list.stream().reduce("", (acc, val) -> acc.concat("_" + val));
+ gid = gid.replaceAll("[^A-Za-z0-9_-]", "-");
+ return gid;
+ }
+ m = isSingleton();
+ if (m.matches()) {
+ return m.group(1).split("-")[0];
+ }
+ throw new MalformedURLException(
+ "Expected redgifs.com format: "
+ + "redgifs.com/watch/id or "
+ + "redgifs.com/users/id or "
+ + "redgifs.com/gifs/id or "
+ + "redgifs.com/search?query=text"
+ + " Got: " + url);
+ }
+
+ @Override
+ public JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException {
+ if (currentPage == maxPages || isSingleton().matches()) {
+ return null;
+ }
+ currentPage++;
+ if (isSearch().matches() || isTags().matches()) {
+ var json = Http.url(getSearchOrTagsURL()).header("Authorization", "Bearer " + authToken).getJSON();
+ // Handle rare maxPages change during a rip
+ maxPages = json.getInt("pages");
+ return json;
+ } else if (isProfile().matches()) {
+ var uri = new URIBuilder(String.format(USERS_SEARCH_ENDPOINT, getGID(url)));
+ uri.addParameter("order", "new");
+ uri.addParameter("count", Integer.toString(count));
+ uri.addParameter("page", Integer.toString(currentPage));
+ var json = Http.url(uri.build().toURL()).header("Authorization", "Bearer " + authToken).getJSON();
+ // Handle rare maxPages change during a rip
+ maxPages = json.getInt("pages");
+ return json;
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public List getURLsFromJSON(JSONObject json) {
+ List result = new ArrayList<>();
+ if (isProfile().matches() || isSearch().matches() || isTags().matches()) {
+ var gifs = json.getJSONArray("gifs");
+ for (var gif : gifs) {
+ if (((JSONObject)gif).isNull("gallery")) {
+ var hdURL = ((JSONObject)gif).getJSONObject("urls").getString("hd");
+ result.add(hdURL);
+ } else {
+ var galleryID = ((JSONObject)gif).getString("gallery");
+ var gifID = ((JSONObject)gif).getString("id");
+ result.addAll(getURLsForGallery(galleryID, gifID));
+ }
+ }
+ } else {
+ var gif = json.getJSONObject("gif");
+ if (gif.isNull("gallery")) {
+ String hdURL = gif.getJSONObject("urls").getString("hd");
+ result.add(hdURL);
+ } else {
+ var galleryID = gif.getString("gallery");
+ var gifID = gif.getString("id");
+ result.addAll(getURLsForGallery(galleryID, gifID));
+ }
+ }
+ return result;
+ }
+
+
+ /**
+ * Get all images for a gif url with multiple images
+ * @param galleryID gallery id
+ * @param gifID gif id with multiple images for logging
+ * @return List
+ */
+ private static List getURLsForGallery(String galleryID, String gifID) {
+ List list = new ArrayList<>();
+ if (galleryID == null || galleryID.isBlank()) {
+ return list;
+ }
+ try {
+ var json = Http.url(String.format(GALLERY_ENDPOINT, galleryID)).header("Authorization", "Bearer " + authToken).getJSON();
+ for (var gif : json.getJSONArray("gifs")) {
+ var hdURL = ((JSONObject)gif).getJSONObject("urls").getString("hd");
+ list.add(hdURL);
+ }
+ } catch (IOException e) {
+ LOGGER.error(String.format("Error fetching gallery %s for gif %s", galleryID, gifID), e);
+ }
+ return list;
+ }
+ /**
+ * Static helper method for retrieving video URLs for usage in RipUtils.
+ * Most of the code is lifted from getFirstPage and getURLsFromJSON
+ * @param url URL to redgif page
+ * @return URL to video
+ * @throws IOException
+ */
+ public static String getVideoURL(URL url) throws IOException, URISyntaxException {
+ LOGGER.info("Retrieving " + url.toExternalForm());
+ var m = SINGLETON_PATTERN.matcher(url.toExternalForm());
+ if (!m.matches()){
+ throw new IOException(String.format("Cannot fetch redgif url %s", url.toExternalForm()));
+ }
+ if (authToken == null || authToken.isBlank()){
+ fetchAuthToken();
+ }
+ var gid = m.group(1).split("-")[0];
+ var gifDetailsURL = String.format(GIFS_DETAIL_ENDPOINT, gid);
+ var json = Http.url(gifDetailsURL).header("Authorization", "Bearer " + authToken).getJSON();
+ var gif = json.getJSONObject("gif");
+ if (!gif.isNull("gallery")){
+ // TODO check how to handle a image gallery
+ throw new IOException(String.format("Multiple images found for url %s", url));
+ }
+ return gif.getJSONObject("urls").getString("hd");
+ }
+
+
+ /**
+ * Fetch a temorary auth token for the rip
+ * @throws IOException
+ */
+ private static void fetchAuthToken() throws IOException{
+ var json = Http.url(TEMPORARY_AUTH_ENDPOINT).getJSON();
+ var token = json.getString("token");
+ authToken = token;
+ LOGGER.info("Incase of redgif 401 errors, please restart the app to refresh the auth token");
+ }
+
+ /**
+ * Map browser url query params to search or tags endpoint query params and return the complete url.
+ *
+ * Search text for search url comes from the query params, whereas search text for tags url comes from the path.
+ *
+ * Tab type for search url comes from the path whereas, tab type for tags url comes from query params.
+ * @return Search or tags endpoint url
+ */
+ private URL getSearchOrTagsURL() throws IOException, URISyntaxException {
+ URIBuilder uri;
+ Map endpointQueryParams = new HashMap<>();
+ var browserURLQueryParams = new URIBuilder(url.toString()).getQueryParams();
+ for (var qp : browserURLQueryParams) {
+ var name = qp.getName();
+ var value = qp.getValue();
+ switch (name) {
+ case "query":
+ endpointQueryParams.put("query", URLDecoder.decode(value, StandardCharsets.UTF_8));
+ break;
+ case "tab":
+ switch (value) {
+ case "gifs" -> endpointQueryParams.put("type", "g");
+ case "images" -> endpointQueryParams.put("type", "i");
+ default -> LOGGER.warn(String.format("Unsupported tab for tags url %s", value));
+ }
+ break;
+ case "verified":
+ if (value != null && value.equals("1")) {
+ if (isTags().matches()) {
+ endpointQueryParams.put("verified", "y");
+ } else {
+ endpointQueryParams.put("verified", "yes");
+ }
+ }
+ break;
+ case "order":
+ endpointQueryParams.put("order", value);
+ break;
+ case "viewMode":
+ break;
+ default:
+ LOGGER.warn(String.format("Unexpected query param %s for search url. Skipping.", name));
+ }
+ }
+
+ // Build the search or tags url and add missing query params if any
+ if (isTags().matches()) {
+ var subpaths = url.getPath().split("/");
+ if (subpaths.length != 0) {
+ endpointQueryParams.put("search_text", subpaths[subpaths.length-1]);
+ } else {
+ throw new IOException("Failed to get search tags for url");
+ }
+ // Check if it is the main tags page with all gifs, images, creator etc
+ if (!endpointQueryParams.containsKey("type")) {
+ LOGGER.warn("No tab selected, defaulting to gifs");
+ endpointQueryParams.put("type", "g");
+ }
+ uri = new URIBuilder(TAGS_ENDPOINT);
+ } else {
+ var tabType = "gifs";
+ var subpaths = url.getPath().split("/");
+ if (subpaths.length != 0) {
+ switch (subpaths[subpaths.length-1]) {
+ case "gifs" -> tabType = "gifs";
+ case "images" -> tabType = "images";
+ case "search" -> LOGGER.warn("No tab selected, defaulting to gifs");
+ default -> LOGGER.warn(String.format("Unsupported search tab %s, defaulting to gifs", subpaths[subpaths.length-1]));
+ }
+ }
+ uri = new URIBuilder(String.format(SEARCH_ENDPOINT, tabType));
+ }
+
+ endpointQueryParams.put("page", Integer.toString(currentPage));
+ endpointQueryParams.put("count", Integer.toString(count));
+ endpointQueryParams.forEach((k, v) -> uri.addParameter(k, v));
+
+ return uri.build().toURL();
+ }
+}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/Rule34Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/Rule34Ripper.java
index 681738fa..c7245739 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/Rule34Ripper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/Rule34Ripper.java
@@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@@ -51,13 +53,13 @@ public class Rule34Ripper extends AbstractHTMLRipper {
"rule34.xxx/index.php?page=post&s=list&tags=TAG - got " + url + " instead");
}
- public URL getAPIUrl() throws MalformedURLException {
- URL urlToReturn = new URL("https://rule34.xxx/index.php?page=dapi&s=post&q=index&limit=100&tags=" + getGID(url));
+ public URL getAPIUrl() throws MalformedURLException, URISyntaxException {
+ URL urlToReturn = new URI("https://rule34.xxx/index.php?page=dapi&s=post&q=index&limit=100&tags=" + getGID(url)).toURL();
return urlToReturn;
}
@Override
- public Document getFirstPage() throws IOException {
+ public Document getFirstPage() throws IOException, URISyntaxException {
apiUrl = getAPIUrl().toExternalForm();
// "url" is an instance field of the superclass
return Http.url(getAPIUrl()).get();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RulePornRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RulePornRipper.java
index c9c487a7..be33c945 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/RulePornRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RulePornRipper.java
@@ -40,11 +40,6 @@ public class RulePornRipper extends AbstractSingleFileRipper {
"Expected ruleporn.com URL format: " + "ruleporn.com/NAME - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List result = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java
new file mode 100644
index 00000000..2df6ab2c
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java
@@ -0,0 +1,293 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.net.*;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.java_websocket.client.WebSocketClient;
+
+import org.apache.http.NameValuePair;
+import org.apache.http.client.utils.URLEncodedUtils;
+import org.java_websocket.handshake.ServerHandshake;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+import com.rarchives.ripme.ripper.AbstractJSONRipper;
+
+public class ScrolllerRipper extends AbstractJSONRipper {
+
+ public ScrolllerRipper(URL url) throws IOException {
+ super(url);
+ }
+
+ @Override
+ public String getHost() {
+ return "scrolller";
+ }
+ @Override
+ public String getDomain() {
+ return "scrolller.com";
+ }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ // Typical URL is: https://scrolller.com/r/subreddit
+ // Parameters like "filter" and "sort" can be passed (ex: https://scrolller.com/r/subreddit?filter=xxx&sort=yyyy)
+ Pattern p = Pattern.compile("^https?://scrolller\\.com/r/([a-zA-Z0-9]+).*?$");
+ Matcher m = p.matcher(url.toExternalForm());
+ if (m.matches()) {
+ return m.group(1);
+ }
+ throw new MalformedURLException("Expected scrolller.com URL format: " +
+ "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead");
+ }
+
+ @Override
+ public void downloadURL(URL url, int index) {
+ addURLToDownload(url, getPrefix(index));
+ }
+
+
+ private JSONObject prepareQuery(String iterator, String gid, String sortByString) throws IOException, URISyntaxException {
+
+ String QUERY_NOSORT = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }";
+ String QUERY_SORT = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }";
+
+ String filterString = convertFilterString(getParameter(this.url,"filter"));
+
+ JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase());
+ JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", sortByString.equals("") ? QUERY_NOSORT : QUERY_SORT);
+
+ if (iterator != null) {
+ // Iterator is not present on the first page
+ variablesObject.put("iterator", iterator);
+ }
+ if (!filterString.equals("NOFILTER")) {
+ variablesObject.put("filter", filterString);
+ }
+
+ return sortByString.equals("") ? getPosts(finalQueryObject) : getPostsSorted(finalQueryObject);
+
+ }
+
+
+ public String convertFilterString(String filterParameter) {
+ // Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query
+ // I could basically remove the last "s" and call toUpperCase instead of this switch statement but this looks easier to read.
+ switch (filterParameter.toLowerCase()) {
+ case "pictures":
+ return "PICTURE";
+ case "videos":
+ return "VIDEO";
+ case "albums":
+ return "ALBUM";
+ case "":
+ return "NOFILTER";
+ default:
+ LOGGER.error(String.format("Invalid filter %s using no filter",filterParameter));
+ return "";
+ }
+ }
+
+ public String getParameter(URL url, String parameter) throws MalformedURLException {
+ // Gets passed parameters from the URL
+ String toReplace = String.format("https://scrolller.com/r/%s?",getGID(url));
+ List args= URLEncodedUtils.parse(url.toExternalForm(), Charset.defaultCharset());
+ for (NameValuePair arg:args) {
+ // First parameter contains part of the url so we have to remove it
+ // Ex: for the url https://scrolller.com/r/CatsStandingUp?filter=xxxx&sort=yyyy
+ // 1) arg.getName() => https://scrolller.com/r/CatsStandingUp?filter
+ // 2) arg.getName() => sort
+
+ if (arg.getName().replace(toReplace,"").toLowerCase().equals((parameter))) {
+ return arg.getValue();
+ }
+ }
+ return "";
+ }
+
+ private JSONObject getPosts(JSONObject data) {
+ // The actual GraphQL query call
+
+ try {
+ String url = "https://api.scrolller.com/api/v2/graphql";
+
+ URL obj = new URI(url).toURL();
+ HttpURLConnection conn = (HttpURLConnection) obj.openConnection();
+ conn.setReadTimeout(5000);
+ conn.addRequestProperty("Accept-Language", "en-US,en;q=0.8");
+ conn.addRequestProperty("User-Agent", "Mozilla");
+ conn.addRequestProperty("Referer", "scrolller.com");
+
+ conn.setDoOutput(true);
+
+ OutputStreamWriter w = new OutputStreamWriter(conn.getOutputStream(), "UTF-8");
+
+ w.write(data.toString());
+ w.close();
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
+ String inputLine;
+ StringBuffer jsonString = new StringBuffer();
+
+ while ((inputLine = in.readLine()) != null) {
+ jsonString.append(inputLine);
+ }
+
+ in.close();
+ conn.disconnect();
+
+ return new JSONObject(jsonString.toString());
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ return new JSONObject("{}");
+ }
+
+ private JSONObject getPostsSorted(JSONObject data) throws MalformedURLException {
+
+ // The actual GraphQL query call (if sort parameter is present)
+ try {
+
+ ArrayList postsJsonStrings = new ArrayList<>();
+
+ WebSocketClient wsc = new WebSocketClient(new URI("wss://api.scrolller.com/api/v2/graphql")) {
+ @Override
+ public void onOpen(ServerHandshake serverHandshake) {
+ // As soon as the WebSocket connects send our query
+ this.send(data.toString());
+ }
+
+ @Override
+ public void onMessage(String s) {
+ postsJsonStrings.add(s);
+ if (new JSONObject(s).getJSONObject("data").getJSONObject("fetchSubreddit").has("iterator")) {
+ this.close();
+ }
+ }
+
+ @Override
+ public void onClose(int i, String s, boolean b) {
+ }
+
+ @Override
+ public void onError(Exception e) {
+ LOGGER.error(String.format("WebSocket error, server reported %s", e.getMessage()));
+ }
+ };
+ wsc.connect();
+
+ while (!wsc.isClosed()) {
+ // Posts list is not over until the connection closes.
+ }
+
+ JSONObject finalObject = new JSONObject();
+ JSONArray posts = new JSONArray();
+
+ // Iterator is the last object in the post list, let's duplicate it in his own object for clarity.
+ finalObject.put("iterator", new JSONObject(postsJsonStrings.get(postsJsonStrings.size()-1)));
+
+ for (String postString : postsJsonStrings) {
+ posts.put(new JSONObject(postString));
+ }
+ finalObject.put("posts", posts);
+
+ if (finalObject.getJSONArray("posts").length() == 1 && !finalObject.getJSONArray("posts").getJSONObject(0).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) {
+ // Only iterator, no posts.
+ return null;
+ }
+
+ return finalObject;
+
+
+ } catch (URISyntaxException ue) {
+ // Nothing to catch, it's an hardcoded URI.
+ }
+
+ return null;
+ }
+
+
+ @Override
+ protected List getURLsFromJSON(JSONObject json) throws JSONException {
+
+ boolean sortRequested = json.has("posts");
+
+ int bestArea = 0;
+ String bestUrl = "";
+ List list = new ArrayList<>();
+
+ JSONArray itemsList = sortRequested ? json.getJSONArray("posts") : json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items");
+
+ for (Object item : itemsList) {
+
+ if (sortRequested && !((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) {
+ continue;
+ }
+
+ JSONArray sourcesTMP = sortRequested ? ((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").getJSONArray("mediaSources") : ((JSONObject) item).getJSONArray("mediaSources");
+ for (Object sourceTMP : sourcesTMP)
+ {
+ int widthTMP = ((JSONObject) sourceTMP).getInt("width");
+ int heightTMP = ((JSONObject) sourceTMP).getInt("height");
+ int areaTMP = widthTMP * heightTMP;
+
+ if (areaTMP > bestArea) {
+ bestArea = widthTMP;
+ bestUrl = ((JSONObject) sourceTMP).getString("url");
+ }
+ }
+ list.add(bestUrl);
+ bestUrl = "";
+ bestArea = 0;
+ }
+
+ return list;
+ }
+
+ @Override
+ protected JSONObject getFirstPage() throws IOException {
+ try {
+ return prepareQuery(null, this.getGID(url), getParameter(url,"sort"));
+ } catch (URISyntaxException e) {
+ LOGGER.error(String.format("Error obtaining first page: %s", e.getMessage()));
+ return null;
+ }
+ }
+
+ @Override
+ public JSONObject getNextPage(JSONObject source) throws IOException {
+ // Every call the the API contains an "iterator" string that we need to pass to the API to get the next page
+ // Checking if iterator is null is not working for some reason, hence why the weird "iterator.toString().equals("null")"
+
+ Object iterator = null;
+ if (source.has("iterator")) {
+ // Sort requested, custom JSON.
+ iterator = source.getJSONObject("iterator").getJSONObject("data").getJSONObject("fetchSubreddit").get("iterator");
+ } else {
+ iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator");
+ }
+
+ if (!iterator.toString().equals("null")) {
+ // Need to change page.
+ try {
+ return prepareQuery(iterator.toString(), this.getGID(url), getParameter(url,"sort"));
+ } catch (URISyntaxException e) {
+ LOGGER.error(String.format("Error changing page: %s", e.getMessage()));
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ShesFreakyRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ShesFreakyRipper.java
index 73dad1b1..b96e2f6b 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ShesFreakyRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ShesFreakyRipper.java
@@ -12,7 +12,6 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
public class ShesFreakyRipper extends AbstractHTMLRipper {
@@ -41,11 +40,6 @@ public class ShesFreakyRipper extends AbstractHTMLRipper {
+ "shesfreaky.com/gallery/... - got " + url + "instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- return Http.url(url).get();
- }
-
@Override
public List getURLsFromPage(Document doc) {
List imageURLs = new ArrayList<>();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java
index d6a0f9cb..f3a216f4 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java
@@ -41,12 +41,6 @@ public class SinfestRipper extends AbstractHTMLRipper {
"sinfest.net/view.php?date=XXXX-XX-XX/ - got " + url + " instead");
}
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
@Override
public Document getNextPage(Document doc) throws IOException {
Element elem = doc.select("td.style5 > a > img").last();
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java
deleted file mode 100644
index 4411adfe..00000000
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java
+++ /dev/null
@@ -1,168 +0,0 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Http;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-
-public class SinnercomicsRipper extends AbstractHTMLRipper {
-
- private static final String HOST = "sinnercomics",
- DOMAIN = "sinnercomics.com";
-
- private static final int SLEEP_TIME = 500;
-
- enum RIP_TYPE {
- HOMEPAGE,
- PINUP,
- COMIC
- }
-
- private RIP_TYPE ripType;
- private Integer pageNum;
-
- public SinnercomicsRipper(URL url) throws IOException {
- super(url);
- }
-
- @Override
- public String getHost() {
- return HOST;
- }
-
- @Override
- public String getDomain() {
- return DOMAIN;
- }
-
- @Override
- public String normalizeUrl(String url) {
- // Remove the comments hashtag
- return url.replaceAll("/#(comments|disqus_thread)", "/");
- }
-
- @Override
- public String getGID(URL url) throws MalformedURLException {
- String cleanUrl = normalizeUrl(url.toExternalForm());
- Pattern p;
- Matcher m;
-
- p = Pattern.compile("^https?://sinnercomics\\.com/comic/([a-zA-Z0-9-]*)/?$");
- m = p.matcher(cleanUrl);
- if (m.matches()) {
- // Comic
- this.ripType = RIP_TYPE.COMIC;
- return m.group(1).replaceAll("-page-\\d+", "");
- }
-
- p = Pattern.compile("^https?://sinnercomics\\.com(?:/page/([0-9]+))?/?$");
- m = p.matcher(cleanUrl);
- if (m.matches()) {
- // Homepage
- this.ripType = RIP_TYPE.HOMEPAGE;
- if (m.group(1) != null) {
- this.pageNum = Integer.valueOf(m.group(1));
- } else {
- this.pageNum = 1;
- }
- return "homepage";
- }
-
- p = Pattern.compile("^https?://sinnercomics\\.com/([a-zA-Z0-9-]+)(?:/#comments)?/?$");
- m = p.matcher(cleanUrl);
- if (m.matches()) {
- // Pinup image
- this.ripType = RIP_TYPE.PINUP;
- return m.group(1);
- }
-
- throw new MalformedURLException("Expected sinnercomics.com URL format: " +
- "/pinupName or /comic/albumName or /page/number - got " + cleanUrl + " instead");
- }
-
- @Override
- public boolean canRip(URL url) {
- if (!url.getHost().endsWith(DOMAIN)) {
- return false;
- }
- try {
- getGID(url);
- } catch (MalformedURLException e) {
- // Can't get GID, can't rip it.
- return false;
- }
- return true;
- }
-
- @Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
- @Override
- public Document getNextPage(Document doc) throws IOException {
- String nextUrl = null;
-
- switch (this.ripType) {
- case PINUP:
- throw new IOException("No next page on a pinup");
-
- case COMIC:
- // We use comic-nav-next to the find the next page
- Element elem = doc.select("a.comic-nav-next").first();
- if (elem == null) {
- throw new IOException("No more pages");
- }
- nextUrl = elem.attr("href");
- break;
-
- default: // case HOMEPAGE:
- this.pageNum++;
- nextUrl = "https://sinnercomics.com/page/" + String.valueOf(this.pageNum);
- break;
- }
-
- // Wait to avoid IP bans
- sleep(SLEEP_TIME);
- return Http.url(nextUrl).get();
- }
-
- @Override
- public List getURLsFromPage(Document doc) {
- List result = new ArrayList<>();
-
- switch (this.ripType) {
- case COMIC:
- // comic pages only contain one image, determined by a meta tag
- for (Element el : doc.select("meta[property=og:image]")) {
- String imageSource = el.attr("content");
- imageSource = imageSource.replace(" alt=", "");
- result.add(imageSource);
- }
- break;
- default:
- for (Element el : doc.select(".entry p img")) {
- // These filters match the full size images but might match ads too...
- result.add(el.attr("src"));
- }
- break;
- }
-
- return result;
- }
-
- @Override
- public void downloadURL(URL url, int index) {
- addURLToDownload(url, getPrefix(index));
- }
-
-}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java
index b61f2fef..ad00e5c8 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java
@@ -89,11 +89,6 @@ public class SmuttyRipper extends AbstractHTMLRipper {
}
@Override
- public Document getFirstPage() throws IOException {
- // "url" is an instance field of the superclass
- return Http.url(url).get();
- }
-
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SoundgasmRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SoundgasmRipper.java
new file mode 100644
index 00000000..ab9ebfa9
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SoundgasmRipper.java
@@ -0,0 +1,69 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class SoundgasmRipper extends AbstractHTMLRipper {
+
+ private static final String HOST = "soundgasm.net";
+
+ public SoundgasmRipper(URL url) throws IOException, URISyntaxException {
+ super(new URI(url.toExternalForm()).toURL());
+ }
+
+ @Override
+ protected String getDomain() { return "soundgasm.net"; }
+
+ @Override
+ public String getHost() { return "soundgasm"; }
+
+ @Override
+ public String getGID(URL url) throws MalformedURLException {
+ Pattern p = Pattern.compile("^/u/([a-zA-Z0-9_-]+)/([a-zA-Z0-9_-]+).*$");
+ Matcher m = p.matcher(url.getFile());
+ if (m.find()) {
+ return m.group(m.groupCount());
+ }
+ throw new MalformedURLException(
+ "Expected soundgasm.net format: "
+ + "soundgasm.net/u/username/id or "
+ + " Got: " + url);
+ }
+
+ @Override
+ public Document getFirstPage() throws IOException, URISyntaxException {
+ return super.getFirstPage();
+ }
+
+ @Override
+ public List getURLsFromPage(Document page) {
+ List