diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java
index 7d1a38bc..e3bdd028 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java
@@ -7,13 +7,31 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.jsoup.Jsoup;
+import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
public class ThechiveRipper extends AbstractHTMLRipper {
+ private Pattern p1 = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
+ private Pattern imagePattern = Pattern.compile("
");
+
+ // i.thechive.com specific variables.
+ private Pattern p2 = Pattern.compile("^https?://i.thechive.com/([0-9a-zA-Z_]+)");
+ private String jsonUrl = "https://i.thechive.com/rest/uploads";
+ private Map cookies = new HashMap<>();
+ private String nextSeed = "";
+ private String username = "";
public ThechiveRipper(URL url) throws IOException {
super(url);
@@ -21,7 +39,12 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public String getHost() {
- return "thechive";
+ Matcher m1 = p1.matcher(url.toExternalForm());
+ if (m1.matches()) {
+ return "thechive";
+ } else {
+ return "i.thechive"; // for suitable album title.
+ }
}
@Override
@@ -31,14 +54,20 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
- Matcher m = p.matcher(url.toExternalForm());
- if (m.matches()) {
- boolean isTag = false;
- return m.group(1);
+
+ Matcher m1 = p1.matcher(url.toExternalForm());
+ if (m1.matches()) {
+ return m1.group(1);
}
+
+ Matcher m2 = p2.matcher(url.toExternalForm());
+ if (m2.matches()) {
+ username = m2.group(1);
+ return username;
+ }
+
throw new MalformedURLException("Expected thechive.com URL format: "
- + "thechive.com/YEAR/MONTH/DAY/POSTTITLE/ - got " + url + " instead");
+ + "thechive.com/YEAR/MONTH/DAY/POSTTITLE/ OR i.thechive.com/username, got " + url + " instead.");
}
@Override
@@ -49,27 +78,120 @@ public class ThechiveRipper extends AbstractHTMLRipper {
@Override
public List getURLsFromPage(Document doc) {
- List result = new ArrayList<>();
- for (Element el : doc.select("img.attachment-gallery-item-full")) {
- String imageSource;
- if (el.attr("data-gifsrc").isEmpty()) { //If it's not a gif
- imageSource = el.attr("src");
- } else { //If it is a gif
- imageSource = el.attr("data-gifsrc") //from data-gifsrc attribute
- .replaceAll("\\?w=\\d{3}", ""); //remove the width modifier at the end to get highest resolution
- //May need to replace the regex's {3} later on if website starts giving higher-res photos by default.
- }
+ List result;
+ Matcher matcher = p1.matcher(url.toExternalForm());
- // We replace thumbs with resizes so we can the full sized images
- imageSource = imageSource.replace("thumbs", "resizes");
- result.add(imageSource);
+ if (matcher.matches()) {
+ result = getUrlsFromThechive(doc);
+ } else {
+ result = getUrlsFromIDotThechive();
}
return result;
}
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ Matcher matcher = p1.matcher(url.toExternalForm());
+
+ if (matcher.matches()) {
+ // for pattern p1.
+ return null;
+ } else {
+ if (nextSeed == null) {
+ throw new IOException("No more pages.");
+ }
+ }
+
+ // check if next json has elements.
+ JSONArray imgList;
+ try {
+ Response response = Http.url(jsonUrl).data("seed", nextSeed).data("queryType", "by-username")
+ .data("username", username).ignoreContentType().cookies(cookies).response();
+ cookies = response.cookies();
+ JSONObject json = new JSONObject(response.body());
+ imgList = json.getJSONArray("uploads");
+ } catch (Exception e) {
+ throw new IOException("Error fetching next page.", e);
+ }
+
+ if (imgList != null && imgList.length() > 0) {
+ return new Document(url.toString()); // empty document.
+ } else {
+ return null;
+ }
+ }
+
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
+ private List getUrlsFromThechive(Document doc) {
+ List result = new ArrayList<>();
+ Elements scripts = doc.getElementsByTag("script");
+
+ for (Element script : scripts) {
+ String data = script.data();
+
+ if (!data.contains("CHIVE_GALLERY_ITEMS")) {
+ continue;
+ }
+
+ /*
+ * We add all the
tags in a single StringBuilder and parse as HTML for
+ * easy sorting of img/ gifs.
+ */
+ StringBuilder allImgTags = new StringBuilder();
+ Matcher matcher = imagePattern.matcher(data);
+ while (matcher.find()) {
+ allImgTags.append(matcher.group(0).replaceAll("\\\\", ""));
+ }
+
+ // Now we parse and sort links.
+ Document imgDoc = Jsoup.parse(allImgTags.toString());
+ Elements imgs = imgDoc.getElementsByTag("img");
+ for (Element img : imgs) {
+ if (img.hasAttr("data-gifsrc")) {
+ // result.add(img.attr("data-gifsrc"));
+ result.add(img.attr("data-gifsrc"));
+ } else {
+ // result.add(img.attr("src"));
+ result.add(img.attr("src"));
+ }
+ }
+ }
+
+ // strip all GET parameters from the links( such as quality).
+ result.replaceAll(s -> s.substring(0, s.indexOf("?")));
+
+ return result;
+ }
+
+ private List getUrlsFromIDotThechive() {
+ // check for pattern p2.
+ List result = new ArrayList<>();
+ try {
+ Response response = Http.url(jsonUrl).data("seed", nextSeed).data("queryType", "by-username")
+ .data("username", username).ignoreContentType().cookies(cookies).response();
+ cookies = response.cookies();
+ JSONObject json = new JSONObject(response.body());
+ JSONArray imgList = json.getJSONArray("uploads");
+ nextSeed = null; // if no more images, nextSeed stays null
+ for (int i = 0; i < imgList.length(); i++) {
+ JSONObject img = imgList.getJSONObject(i);
+ if (img.getString("mediaType").equals("gif")) {
+ result.add("https:" + img.getString("mediaUrlOverlay"));
+ } else {
+ result.add("https:" + img.getString("mediaGifFrameUrl"));
+ }
+ nextSeed = img.getString("activityId");
+ }
+ } catch (IOException e) {
+ LOGGER.error("Unable to fetch JSON data for url: " + url);
+ } catch (JSONException e) {
+ LOGGER.error("JSON error while parsing data for url: " + url);
+ }
+ return result;
+ }
+
}
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ThechiveRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ThechiveRipperTest.java
index 89470dce..3e2e3f6c 100644
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ThechiveRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ThechiveRipperTest.java
@@ -26,9 +26,9 @@ package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.ThechiveRipper;
import java.io.IOException;
import java.net.URL;
-import org.jsoup.nodes.Attributes;
-import org.jsoup.nodes.Element;
-import org.jsoup.parser.Tag;
+//import org.jsoup.nodes.Attributes;
+//import org.jsoup.nodes.Element;
+//import org.jsoup.parser.Tag;
/**
*
@@ -41,40 +41,53 @@ public class ThechiveRipperTest extends RippersTest {
*
* @throws IOException
*/
- public void theChiveRip() throws IOException {
- ThechiveRipper ripper = new ThechiveRipper(new URL("https://thechive.com/2018/10/03/the-definitive-list-of-the-hottest-horror-movie-babes/"));
+ public void testTheChiveRip() throws IOException {
+ ThechiveRipper ripper = new ThechiveRipper(new URL(
+ "https://thechive.com/2019/03/16/beautiful-badasses-lookin-good-in-and-out-of-uniform-35-photos/"));
+ testRipper(ripper);
+ }
+
+ public void testTheChiveGif() throws IOException {
+ ThechiveRipper ripper = new ThechiveRipper(
+ new URL("https://thechive.com/2019/03/14/dont-tease-me-just-squeeze-me-20-gifs/"));
testRipper(ripper);
}
/*
-
- //If anyone figures out how to get JSOUP Elements mocked up, we can use the following methods to test both jpeg + gif ripping.
-
- public void testGifRip() throws IOException {
- String elementInString = "
"
-
- Element el = new Element(
- new Tag("img"),
- "",//URI
- new Attributes());
- String URL = ThechiveRipper.getImageSource(el);
- assertTrue(URL.equals("https://thechive.files.wordpress.com/2018/10/american_mary_crimson_quill-1.gif"));
+ * "i.thechive.com" test.
+ */
+ public void testIDotThechive() throws IOException {
+ ThechiveRipper ripper = new ThechiveRipper(new URL("https://i.thechive.com/HHHoney"));
+ testRipper(ripper);
}
- public void testGifRip() throws IOException {
- String elementInString = "
";
- Element el = new Element(
- new Tag("img"),
- "",//URI
- new Attributes());
- String URL = ThechiveRipper.getImageSource(el);
- assertTrue(URL.equals("https://thechive.files.wordpress.com/2018/10/the-definitive-list-of-the-hottest-horror-movie-babes-11.jpg"));
- }
+ /*
+ *
+ * //If anyone figures out how to get JSOUP Elements mocked up, we can use the
+ * following methods to test both jpeg + gif ripping.
+ *
+ * public void testGifRip() throws IOException { String elementInString =
+ * "
"
+ *
+ * Element el = new Element( new Tag("img"), "",//URI new Attributes()); String
+ * URL = ThechiveRipper.getImageSource(el); assertTrue(URL.equals(
+ * "https://thechive.files.wordpress.com/2018/10/american_mary_crimson_quill-1.gif"
+ * )); }
+ *
+ * public void testGifRip() throws IOException { String elementInString =
+ * "
"
+ * ; Element el = new Element( new Tag("img"), "",//URI new Attributes());
+ * String URL = ThechiveRipper.getImageSource(el); assertTrue(URL.equals(
+ * "https://thechive.files.wordpress.com/2018/10/the-definitive-list-of-the-hottest-horror-movie-babes-11.jpg"
+ * )); }
*/
}
\ No newline at end of file