From 67dd4875d941799f2a333a8415c941e4e5ab0573 Mon Sep 17 00:00:00 2001 From: borderline232 Date: Sun, 8 Aug 2021 00:25:20 -0400 Subject: [PATCH] Added Reddit selfPost functionality and fixed Redgifs - Added selfpost download functionality, currently it is fixed and cannot be disabled in the jar - Fixed Redgifs using mobile mp4 in its document and instead uses its api instead to fetch the hd version --- .../ripme/ripper/rippers/RedditRipper.java | 124 ++++++++++++++++++ .../ripme/ripper/rippers/RedgifsRipper.java | 55 ++++---- .../tst/ripper/rippers/RedditRipperTest.java | 14 ++ .../tst/ripper/rippers/RedgifsRipperTest.java | 11 +- 4 files changed, 178 insertions(+), 26 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java index 09569fc7..765f9797 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java @@ -1,14 +1,18 @@ package com.rarchives.ripme.ripper.rippers; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.rarchives.ripme.ui.RipStatusMessage; +import j2html.TagCreator; +import j2html.tags.ContainerTag; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -19,6 +23,9 @@ import com.rarchives.ripme.ui.UpdateUtils; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.RipUtils; import com.rarchives.ripme.utils.Utils; +import org.jsoup.Jsoup; + +import static j2html.TagCreator.*; public class RedditRipper extends AlbumRipper { @@ -104,6 +111,14 @@ public class RedditRipper extends AlbumRipper { children = data.getJSONArray("children"); for (int j = 0; j < children.length(); j++) { parseJsonChild(children.getJSONObject(j)); + + if (children.getJSONObject(j).getString("kind").equals("t3") && + children.getJSONObject(j).getJSONObject("data").getBoolean("is_self") + ) { + URL selfPostURL = new URL(children.getJSONObject(j).getJSONObject("data").getString("url")); + System.out.println(selfPostURL.toExternalForm()); + saveText(getJsonArrayFromURL(getJsonURL(selfPostURL))); + } } if (data.has("after") && !data.isNull("after")) { String nextURLString = Utils.stripURLParameter(url.toExternalForm(), "after"); @@ -225,6 +240,112 @@ public class RedditRipper extends AlbumRipper { } } + private void saveText(JSONArray jsonArray) throws JSONException { + File saveFileAs; + + JSONObject selfPost = jsonArray.getJSONObject(0).getJSONObject("data") + .getJSONArray("children").getJSONObject(0).getJSONObject("data"); + JSONArray comments = jsonArray.getJSONObject(1).getJSONObject("data") + .getJSONArray("children"); + + if (selfPost.getString("selftext").equals("")) { return; } + + final String title = selfPost.getString("title"); + final String id = selfPost.getString("id"); + final String author = selfPost.getString("author"); + final String creationDate = new Date((long) selfPost.getInt("created") * 1000).toString(); + final String subreddit = selfPost.getString("subreddit"); + final String selfText = selfPost.getString("selftext_html"); + final String permalink = selfPost.getString("url"); + + String html = TagCreator.html( + head( + title(title), + style(rawHtml(HTML_STYLING)) + ), + body( + div( + h1(title), + a(subreddit).withHref("https://www.reddit.com/r/" + subreddit), + a("Original").withHref(permalink), + br() + ).withClass("thing"), + div( + div( + span( + a(author).withHref("https://www.reddit.com/u/" + author) + ).withClass("author op") + ).withClass("thing oppost") + .withText(creationDate) + .with(rawHtml(Jsoup.parse(selfText).text())) + ).withClass("flex") + ).with(getComments(comments, author)), + script(rawHtml(HTML_SCRIPT)) + ).renderFormatted(); + + try { + saveFileAs = new File(workingDir.getCanonicalPath() + + "" + File.separator + + id + "_" + title.replaceAll("[\\\\/:*?\"<>|]", "") + + ".html"); + FileOutputStream out = new FileOutputStream(saveFileAs); + out.write(html.getBytes()); + out.close(); + } catch (IOException e) { + LOGGER.error("[!] Error creating save file path for description '" + url + "':", e); + return; + } + + LOGGER.debug("Downloading " + url + "'s self post to " + saveFileAs); + super.retrievingSource(permalink); + if (!saveFileAs.getParentFile().exists()) { + LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent())); + saveFileAs.getParentFile().mkdirs(); + } + } + + private ContainerTag getComments(JSONArray comments, String author) { + ContainerTag commentsDiv = div().withId("comments"); + + for (int i = 0; i < comments.length(); i++) { + JSONObject data = comments.getJSONObject(i).getJSONObject("data"); + + ContainerTag commentDiv = + div( + span(data.getString("author")).withClasses("author", iff(data.getString("author").equals(author), "op")), + a(new Date((long) data.getInt("created") * 1000).toString()).withHref("#" + data.getString("name")) + ).withClass("thing comment").withId(data.getString("name")) + .with(rawHtml(Jsoup.parse(data.getString("body_html")).text())); + + commentDiv = getNestedComments(data, commentDiv, author); + commentsDiv.with(commentDiv); + } + return commentsDiv; + } + + private ContainerTag getNestedComments(JSONObject data, ContainerTag parentDiv, String author) { + if (data.has("replies") && data.get("replies") instanceof JSONObject) { + for (int i = 0; i <= data.getJSONObject("replies").getJSONObject("data").getJSONArray("children").length() - 1; i++) { + JSONObject nestedComment = data.getJSONObject("replies") + .getJSONObject("data") + .getJSONArray("children") + .getJSONObject(i).getJSONObject("data"); + + ContainerTag childDiv = + div( + div( + span(nestedComment.getString("author")).withClasses("author", iff(nestedComment.getString("author").equals(author), "op")), + a(new Date((long) nestedComment.getInt("created") * 1000).toString()).withHref("#" + nestedComment.getString("name")) + ).withClass("comment").withId(nestedComment.getString("name")) + .with(rawHtml(Jsoup.parse(nestedComment.getString("body_html")).text())) + ).withClass("child"); + + parentDiv.with(getNestedComments(nestedComment, childDiv, author)); + } + } + return parentDiv; + } + private URL parseRedditVideoMPD(String vidURL) { org.jsoup.nodes.Document doc = null; try { @@ -369,4 +490,7 @@ public class RedditRipper extends AlbumRipper { throw new MalformedURLException("Only accepts user pages, subreddits, post, or gallery can't understand " + url); } + private static final String HTML_STYLING = " .author { font-weight: bold; } .op { color: blue; } .comment { border: 0px; margin: 0 0 25px; padding-left: 5px; } .child { margin: 2px 0 0 20px; border-left: 2px dashed #AAF; } .collapsed { background: darkgrey; margin-bottom: 0; } .collapsed > div { display: none; } .md { max-width: 840px; padding-right: 1em; } h1 { margin: 0; } body { position: relative; background-color: #eeeeec; color: #00000a; font-weight: 400; font-style: normal; font-variant: normal; font-family: Helvetica,Arial,sans-serif; line-height: 1.4 } blockquote { margin: 5px 5px 5px 15px; padding: 1px 1px 1px 15px; max-width: 60em; border: 1px solid #ccc; border-width: 0 0 0 1px; } pre { white-space: pre-wrap; } img, video { max-width: 60vw; max-height: 90vh; object-fit: contain; } .thing { overflow: hidden; margin: 0 5px 3px 40px; border: 1px solid #e0e0e0; background-color: #fcfcfb; } :target > .md { border: 5px solid blue; } .post { margin-bottom: 20px; margin-top: 20px; } .gold { background: goldenrod; } .silver { background: silver; } .platinum { background: aqua; } .deleted { background: #faa; } .md.deleted { background: inherit; border: 5px solid #faa; } .oppost { background-color: #EEF; } blockquote > p { margin: 0; } #related { max-height: 20em; overflow-y: scroll; background-color: #F4FFF4; } #related h3 { position: sticky; top: 0; background-color: white; } .flex { display: flex; flex-flow: wrap; flex-direction: row-reverse; justify-content: flex-end; } "; + private static final String HTML_SCRIPT = "document.addEventListener('mousedown', function(e) { var t = e.target; if (t.className == 'author') { t = t.parentElement; } if (t.classList.contains('comment')) { t.classList.toggle('collapsed'); e.preventDefault(); e.stopPropagation(); return false; } });"; + } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java index 04442abf..2b169ae3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java @@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; import org.json.JSONArray; import org.json.JSONObject; +import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -72,14 +73,15 @@ public class RedgifsRipper extends AbstractHTMLRipper { @Override public Document getFirstPage() throws IOException { if (!isProfile().matches() && !isSearch().matches()) { - return Http.url(url).get(); + return Jsoup.connect(getJsonURL(url).toExternalForm()) + .ignoreContentType(true).get(); } else if (isSearch().matches()) { searchText = getGID(url).replace("-", " "); return Http.url( - new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get(); + new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get(); } else { username = getGID(url); - return Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count)) + return Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count)) .ignoreContentType().get(); } } @@ -126,7 +128,7 @@ public class RedgifsRipper extends AbstractHTMLRipper { public Document getNextPage(Document doc) throws IOException { if (isSearch().matches()) { Document d = Http.url( - new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText + new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchCount*++searchStart)) .ignoreContentType().get(); return (hasURLs(d).isEmpty()) ? null : d; @@ -134,7 +136,7 @@ public class RedgifsRipper extends AbstractHTMLRipper { if (cursor.equals("") || cursor.equals("null")) { return null; } else { - Document d = Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get(); + Document d = Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get(); return (hasURLs(d).isEmpty()) ? null : d; } } @@ -146,14 +148,9 @@ public class RedgifsRipper extends AbstractHTMLRipper { if (isProfile().matches() || isSearch().matches()) { result = hasURLs(doc); } else { - Elements videos = doc.select("script"); - for (Element el : videos) { - String json = el.html(); - if (json.startsWith("{")) { - JSONObject page = new JSONObject(json); - result.add(page.getJSONObject("video").getString("contentUrl")); - } - } + JSONObject api = new JSONObject(doc.body().html()); + result.add(api.getJSONObject("gfyItem").getString("mp4Url")); + } return result; } @@ -183,19 +180,29 @@ public class RedgifsRipper extends AbstractHTMLRipper { public static String getVideoURL(URL url) throws IOException { LOGGER.info("Retrieving " + url.toExternalForm()); - //Sanitize the URL first - url = new URL(url.toExternalForm().replace("/gifs/detail", "")); + try { + Document doc = Jsoup.connect(getJsonURL(url).toExternalForm()) + .ignoreContentType(true).get(); - Document doc = Http.url(url).get(); - Elements videos = doc.select("script"); - for (Element el : videos) { - String json = el.html(); - if (json.startsWith("{")) { - JSONObject page = new JSONObject(json); - return page.getJSONObject("video").getString("contentUrl"); - } + JSONObject api = new JSONObject(doc.body().html()); + return api.getJSONObject("gfyItem").getJSONObject("content_urls") + .getJSONObject("mp4").getString("url"); + + } catch (NullPointerException e) { + return null; } - throw new IOException(); } + public static URL getJsonURL(URL url) throws MalformedURLException{ + String regex = "^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_]+).*$"; + + final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); + final Matcher matcher = pattern.matcher(url.toExternalForm()); + + if (matcher.matches()) { + return new URL("https://api.redgifs.com/v1/gfycats/" + matcher.group(1)); + } + + return null; + } } diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedditRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedditRipperTest.java index 8ad6fd09..20824fda 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedditRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedditRipperTest.java @@ -47,6 +47,20 @@ public class RedditRipperTest extends RippersTest { testRipper(ripper); } + @Test + public void testSelfPostRip() throws IOException { + RedditRipper ripper = new RedditRipper( + new URL("https://www.reddit.com/r/gonewildstories/comments/oz7d97/f_18_finally_having_a_normal_sex_life/") + ); + testRipper(ripper); + } + + @Test + public void testSelfPostAuthorRip() throws IOException { + RedditRipper ripper = new RedditRipper(new URL("https://www.reddit.com/user/ickybabie_")); + testRipper(ripper); + } + /** * GFYCAT TEST Tests a Bad URL with the "/gifs/detail" inside. * diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedgifsRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedgifsRipperTest.java index ed71128d..01c7a622 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedgifsRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedgifsRipperTest.java @@ -1,5 +1,6 @@ package com.rarchives.ripme.tst.ripper.rippers; +import com.rarchives.ripme.ripper.rippers.RedditRipper; import com.rarchives.ripme.ripper.rippers.RedgifsRipper; import org.jsoup.nodes.Document; import org.junit.jupiter.api.*; @@ -53,8 +54,14 @@ public class RedgifsRipperTest extends RippersTest { Document doc = ripper.getFirstPage(); doc = ripper.getNextPage(doc); - Assertions.assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location())); + Assertions.assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location())); doc = ripper.getNextPage(doc); - Assertions.assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location())); + Assertions.assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location())); + } + + @Test + public void testRedditRedgifs() throws IOException { + RedditRipper ripper = new RedditRipper(new URL("https://www.reddit.com/r/nsfwhardcore/comments/ouz5bw/me_cumming_on_his_face/")); + testRipper(ripper); } }