diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java
index 7d1a38bc..3c9d751d 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java
@@ -7,13 +7,31 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.jsoup.Jsoup;
+import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
public class ThechiveRipper extends AbstractHTMLRipper {
+ private Pattern p1 = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
+ private Pattern imagePattern = Pattern.compile("
");
+
+ // i.thechive.com specific variables.
+ private Pattern p2 = Pattern.compile("^https?://i.thechive.com/([0-9a-zA-Z_]+)");
+ private String jsonUrl = "https://i.thechive.com/rest/uploads";
+ private Map cookies = new HashMap<>();
+ private String nextSeed = "";
+ private String username = "";
public ThechiveRipper(URL url) throws IOException {
super(url);
@@ -21,7 +39,12 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public String getHost() {
- return "thechive";
+ Matcher m1 = p1.matcher(url.toExternalForm());
+ if (m1.matches()) {
+ return "thechive";
+ } else {
+ return "i.thechive"; // for suitable album title.
+ }
}
@Override
@@ -31,14 +54,20 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- boolean isTag = false;
- return m.group(1);
+
+ Matcher m1 = p1.matcher(url.toExternalForm());
+ if (m1.matches()) {
+ return m1.group(1);
}
+
+ Matcher m2 = p2.matcher(url.toExternalForm());
+ if (m2.matches()) {
+ username = m2.group(1);
+ return username;
+ }
+
throw new MalformedURLException("Expected thechive.com URL format: "
- + "thechive.com/YEAR/MONTH/DAY/POSTTITLE/ - got " + url + " instead");
+ + "thechive.com/YEAR/MONTH/DAY/POSTTITLE/ OR i.thechive.com/username, got " + url + " instead.");
}
@Override
@@ -49,27 +78,148 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public List getURLsFromPage(Document doc) {
- List result = new ArrayList<>();
- for (Element el : doc.select("img.attachment-gallery-item-full")) {
- String imageSource;
- if (el.attr("data-gifsrc").isEmpty()) { //If it's not a gif
- imageSource = el.attr("src");
- } else { //If it is a gif
- imageSource = el.attr("data-gifsrc") //from data-gifsrc attribute
- .replaceAll("\\?w=\\d{3}", ""); //remove the width modifier at the end to get highest resolution
- //May need to replace the regex's {3} later on if website starts giving higher-res photos by default.
- }
+ List result;
+ Matcher matcher = p1.matcher(url.toExternalForm());
- // We replace thumbs with resizes so we can the full sized images
- imageSource = imageSource.replace("thumbs", "resizes");
- result.add(imageSource);
+ if (matcher.matches()) {
+ // for url type: thechive.com/YEAR/MONTH/DAY/POSTTITLE/
+ result = getUrlsFromThechive(doc);
+ } else {
+ // for url type: i.thechive.com/username
+ result = getUrlsFromIDotThechive();
}
return result;
}
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ Matcher matcher = p1.matcher(url.toExternalForm());
+
+ if (matcher.matches()) {
+ // url type thechive.com/YEAR/MONTH/DAY/POSTTITLE/ has a single page.
+ return null;
+ } else {
+ if (nextSeed == null) {
+ throw new IOException("No more pages.");
+ }
+ }
+
+ // Following try block checks if the next JSON object has images or not.
+ // This is done to avoid IOException in rip() method, caused when
+ // getURLsFromPage() returns empty list.
+ JSONArray imgList;
+ try {
+ Response response = Http.url(jsonUrl).data("seed", nextSeed).data("queryType", "by-username")
+ .data("username", username).ignoreContentType().cookies(cookies).response();
+ cookies = response.cookies();
+ JSONObject json = new JSONObject(response.body());
+ imgList = json.getJSONArray("uploads");
+ } catch (Exception e) {
+ throw new IOException("Error fetching next page.", e);
+ }
+
+ if (imgList != null && imgList.length() > 0) {
+ // Pass empty document as it is of no use for thechive.com/userName url type.
+ return new Document(url.toString());
+ } else {
+ // Return null as this is last page.
+ return null;
+ }
+ }
+
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
+ private List getUrlsFromThechive(Document doc) {
+ /*
+ * The image urls are stored in a