Added Reddit selfPost functionality and fixed Redgifs

- Added selfpost download functionality, currently it is fixed and cannot be disabled in the jar - Fixed Redgifs using mobile mp4 in its document and instead uses its api instead to fetch the hd version
2025-08-19 20:21:52 +02:00 · 2021-08-08 00:25:20 -04:00
parent 18f141bbef
commit 67dd4875d9
4 changed files with 178 additions and 26 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java
@@ -1,14 +1,18 @@
 package com.rarchives.ripme.ripper.rippers;

 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.Date;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import com.rarchives.ripme.ui.RipStatusMessage;
+import j2html.TagCreator;
+import j2html.tags.ContainerTag;
 import org.json.JSONArray;
 import org.json.JSONException;
 import org.json.JSONObject;
@@ -19,6 +23,9 @@ import com.rarchives.ripme.ui.UpdateUtils;
 import com.rarchives.ripme.utils.Http;
 import com.rarchives.ripme.utils.RipUtils;
 import com.rarchives.ripme.utils.Utils;
+import org.jsoup.Jsoup;
+
+import static j2html.TagCreator.*;

 public class RedditRipper extends AlbumRipper {

@@ -104,6 +111,14 @@ public class RedditRipper extends AlbumRipper {
            children = data.getJSONArray("children");
            for (int j = 0; j < children.length(); j++) {
                parseJsonChild(children.getJSONObject(j));
+
+                if (children.getJSONObject(j).getString("kind").equals("t3") &&
+                        children.getJSONObject(j).getJSONObject("data").getBoolean("is_self")
+                ) {
+                    URL selfPostURL = new URL(children.getJSONObject(j).getJSONObject("data").getString("url"));
+                    System.out.println(selfPostURL.toExternalForm());
+                    saveText(getJsonArrayFromURL(getJsonURL(selfPostURL)));
+                }
            }
            if (data.has("after") && !data.isNull("after")) {
                String nextURLString = Utils.stripURLParameter(url.toExternalForm(), "after");
@@ -225,6 +240,112 @@ public class RedditRipper extends AlbumRipper {
        }
    }

+    private void saveText(JSONArray jsonArray) throws JSONException {
+        File saveFileAs;
+
+        JSONObject selfPost = jsonArray.getJSONObject(0).getJSONObject("data")
+                .getJSONArray("children").getJSONObject(0).getJSONObject("data");
+        JSONArray comments = jsonArray.getJSONObject(1).getJSONObject("data")
+                .getJSONArray("children");
+
+        if (selfPost.getString("selftext").equals("")) { return; }
+
+        final String title = selfPost.getString("title");
+        final String id = selfPost.getString("id");
+        final String author = selfPost.getString("author");
+        final String creationDate = new Date((long) selfPost.getInt("created") * 1000).toString();
+        final String subreddit = selfPost.getString("subreddit");
+        final String selfText = selfPost.getString("selftext_html");
+        final String permalink = selfPost.getString("url");
+
+        String html = TagCreator.html(
+                head(
+                        title(title),
+                        style(rawHtml(HTML_STYLING))
+                ),
+                body(
+                        div(
+                                h1(title),
+                                a(subreddit).withHref("https://www.reddit.com/r/" + subreddit),
+                                a("Original").withHref(permalink),
+                                br()
+                        ).withClass("thing"),
+                        div(
+                                div(
+                                        span(
+                                                a(author).withHref("https://www.reddit.com/u/" + author)
+                                        ).withClass("author op")
+                                ).withClass("thing oppost")
+                                        .withText(creationDate)
+                                        .with(rawHtml(Jsoup.parse(selfText).text()))
+                        ).withClass("flex")
+                ).with(getComments(comments, author)),
+                script(rawHtml(HTML_SCRIPT))
+        ).renderFormatted();
+
+        try {
+            saveFileAs = new File(workingDir.getCanonicalPath()
+                    + "" + File.separator
+                    + id + "_" + title.replaceAll("[\\\\/:*?\"<>|]", "")
+                    + ".html");
+            FileOutputStream out = new FileOutputStream(saveFileAs);
+            out.write(html.getBytes());
+            out.close();
+        } catch (IOException e) {
+            LOGGER.error("[!] Error creating save file path for description '" + url + "':", e);
+            return;
+        }
+
+        LOGGER.debug("Downloading " + url + "'s self post to " + saveFileAs);
+        super.retrievingSource(permalink);
+        if (!saveFileAs.getParentFile().exists()) {
+            LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
+            saveFileAs.getParentFile().mkdirs();
+        }
+    }
+
+    private ContainerTag getComments(JSONArray comments, String author) {
+        ContainerTag commentsDiv = div().withId("comments");
+
+        for (int i = 0; i < comments.length(); i++) {
+            JSONObject data = comments.getJSONObject(i).getJSONObject("data");
+
+            ContainerTag commentDiv =
+                    div(
+                            span(data.getString("author")).withClasses("author", iff(data.getString("author").equals(author), "op")),
+                            a(new Date((long) data.getInt("created") * 1000).toString()).withHref("#" + data.getString("name"))
+                    ).withClass("thing comment").withId(data.getString("name"))
+                            .with(rawHtml(Jsoup.parse(data.getString("body_html")).text()));
+
+            commentDiv = getNestedComments(data, commentDiv, author);
+            commentsDiv.with(commentDiv);
+        }
+        return commentsDiv;
+    }
+
+    private ContainerTag getNestedComments(JSONObject data, ContainerTag parentDiv, String author) {
+        if (data.has("replies") && data.get("replies") instanceof JSONObject) {
+            for (int i = 0; i <= data.getJSONObject("replies").getJSONObject("data").getJSONArray("children").length() - 1; i++) {
+                JSONObject nestedComment = data.getJSONObject("replies")
+                        .getJSONObject("data")
+                        .getJSONArray("children")
+                        .getJSONObject(i).getJSONObject("data");
+
+                ContainerTag childDiv =
+                        div(
+                                div(
+                                        span(nestedComment.getString("author")).withClasses("author", iff(nestedComment.getString("author").equals(author), "op")),
+                                        a(new Date((long) nestedComment.getInt("created") * 1000).toString()).withHref("#" + nestedComment.getString("name"))
+                                ).withClass("comment").withId(nestedComment.getString("name"))
+                                        .with(rawHtml(Jsoup.parse(nestedComment.getString("body_html")).text()))
+                        ).withClass("child");
+
+                parentDiv.with(getNestedComments(nestedComment, childDiv, author));
+            }
+        }
+        return parentDiv;
+    }
+
    private URL parseRedditVideoMPD(String vidURL) {
        org.jsoup.nodes.Document doc = null;
        try {
@@ -369,4 +490,7 @@ public class RedditRipper extends AlbumRipper {
        throw new MalformedURLException("Only accepts user pages, subreddits, post, or gallery can't understand " + url);
    }

+    private static final String HTML_STYLING = " .author { font-weight: bold; } .op { color: blue; } .comment { border: 0px; margin: 0 0 25px; padding-left: 5px; } .child { margin: 2px 0 0 20px; border-left: 2px dashed #AAF; } .collapsed { background: darkgrey; margin-bottom: 0; } .collapsed > div { display: none; } .md { max-width: 840px; padding-right: 1em; } h1 { margin: 0; } body { position: relative; background-color: #eeeeec; color: #00000a; font-weight: 400; font-style: normal; font-variant: normal; font-family: Helvetica,Arial,sans-serif; line-height: 1.4 } blockquote { margin: 5px 5px 5px 15px; padding: 1px 1px 1px 15px; max-width: 60em; border: 1px solid #ccc; border-width: 0 0 0 1px; } pre { white-space: pre-wrap; } img, video { max-width: 60vw; max-height: 90vh; object-fit: contain; } .thing { overflow: hidden; margin: 0 5px 3px 40px; border: 1px solid #e0e0e0; background-color: #fcfcfb; } :target > .md { border: 5px solid blue; } .post { margin-bottom: 20px; margin-top: 20px; } .gold { background: goldenrod; } .silver { background: silver; } .platinum { background: aqua; } .deleted { background: #faa; } .md.deleted { background: inherit; border: 5px solid #faa; } .oppost { background-color: #EEF; } blockquote > p { margin: 0; } #related { max-height: 20em; overflow-y: scroll; background-color: #F4FFF4; } #related h3 { position: sticky; top: 0; background-color: white; } .flex { display: flex; flex-flow: wrap; flex-direction: row-reverse; justify-content: flex-end; } ";
+    private static final String HTML_SCRIPT = "document.addEventListener('mousedown', function(e) { var t = e.target; if (t.className == 'author') { t = t.parentElement; } if (t.classList.contains('comment')) { t.classList.toggle('collapsed'); e.preventDefault(); e.stopPropagation(); return false; } });";
+
 }
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java
@@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import com.rarchives.ripme.utils.Http;
 import org.json.JSONArray;
 import org.json.JSONObject;
+import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
@@ -72,14 +73,15 @@ public class RedgifsRipper extends AbstractHTMLRipper {
    @Override
    public Document getFirstPage() throws IOException {
        if (!isProfile().matches() && !isSearch().matches()) {
-            return Http.url(url).get();
+            return Jsoup.connect(getJsonURL(url).toExternalForm())
+                    .ignoreContentType(true).get();
        } else if (isSearch().matches()) {
            searchText = getGID(url).replace("-", " ");
            return Http.url(
-                    new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
+                    new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
        } else {
            username = getGID(url);
-            return Http.url(new URL("https://napi.redgifs.com/v1/users/" +  username + "/gfycats?count=" + count))
+            return Http.url(new URL("https://api.redgifs.com/v1/users/" +  username + "/gfycats?count=" + count))
                       .ignoreContentType().get();
        }
    }
@@ -126,7 +128,7 @@ public class RedgifsRipper extends AbstractHTMLRipper {
    public Document getNextPage(Document doc) throws IOException {
        if (isSearch().matches()) {
            Document d = Http.url(
-                    new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText
+                    new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText
                                    + "&count=" + searchCount + "&start=" + searchCount*++searchStart))
                       .ignoreContentType().get();
            return (hasURLs(d).isEmpty()) ? null : d;
@@ -134,7 +136,7 @@ public class RedgifsRipper extends AbstractHTMLRipper {
            if (cursor.equals("") || cursor.equals("null")) {
                return null;
            } else {
-                Document d =  Http.url(new URL("https://napi.redgifs.com/v1/users/" +  username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
+                Document d =  Http.url(new URL("https://api.redgifs.com/v1/users/" +  username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
                return (hasURLs(d).isEmpty()) ? null : d;
            }
        }
@@ -146,14 +148,9 @@ public class RedgifsRipper extends AbstractHTMLRipper {
        if (isProfile().matches() || isSearch().matches()) {
            result = hasURLs(doc);
        } else {
-            Elements videos = doc.select("script");
-            for (Element el : videos) {
-                String json = el.html();
-                if (json.startsWith("{")) {
-                    JSONObject page = new JSONObject(json);
-                    result.add(page.getJSONObject("video").getString("contentUrl"));
-                }
-            }
+            JSONObject api = new JSONObject(doc.body().html());
+            result.add(api.getJSONObject("gfyItem").getString("mp4Url"));
+
        }
        return result;
    }
@@ -183,19 +180,29 @@ public class RedgifsRipper extends AbstractHTMLRipper {
    public static String getVideoURL(URL url) throws IOException {
        LOGGER.info("Retrieving " + url.toExternalForm());

-        //Sanitize the URL first
-        url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
+        try {
+            Document doc = Jsoup.connect(getJsonURL(url).toExternalForm())
+                    .ignoreContentType(true).get();

-        Document doc = Http.url(url).get();
-        Elements videos = doc.select("script");
-        for (Element el : videos) {
-            String json = el.html();
-            if (json.startsWith("{")) {
-                JSONObject page = new JSONObject(json);
-                return page.getJSONObject("video").getString("contentUrl");
-            }
+            JSONObject api = new JSONObject(doc.body().html());
+            return api.getJSONObject("gfyItem").getJSONObject("content_urls")
+                    .getJSONObject("mp4").getString("url");
+
+        } catch (NullPointerException e) {
+            return null;
        }
-        throw new IOException();
    }

+    public static URL getJsonURL(URL url) throws MalformedURLException{
+        String regex = "^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_]+).*$";
+
+        final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
+        final Matcher matcher = pattern.matcher(url.toExternalForm());
+
+        if (matcher.matches()) {
+            return new URL("https://api.redgifs.com/v1/gfycats/" + matcher.group(1));
+        }
+
+        return null;
+    }
 }
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedditRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedditRipperTest.java
@@ -47,6 +47,20 @@ public class RedditRipperTest extends RippersTest {
        testRipper(ripper);
    }

+    @Test
+    public void testSelfPostRip() throws IOException {
+        RedditRipper ripper = new RedditRipper(
+                new URL("https://www.reddit.com/r/gonewildstories/comments/oz7d97/f_18_finally_having_a_normal_sex_life/")
+        );
+        testRipper(ripper);
+    }
+
+    @Test
+    public void testSelfPostAuthorRip() throws IOException {
+        RedditRipper ripper = new RedditRipper(new URL("https://www.reddit.com/user/ickybabie_"));
+        testRipper(ripper);
+    }
+
    /**
     * GFYCAT TEST Tests a Bad URL with the "/gifs/detail" inside.
     * 
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedgifsRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedgifsRipperTest.java
@@ -1,5 +1,6 @@
 package com.rarchives.ripme.tst.ripper.rippers;

+import com.rarchives.ripme.ripper.rippers.RedditRipper;
 import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
 import org.jsoup.nodes.Document;
 import org.junit.jupiter.api.*;
@@ -53,8 +54,14 @@ public class RedgifsRipperTest extends RippersTest {
        Document doc = ripper.getFirstPage();

        doc = ripper.getNextPage(doc);
-        Assertions.assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
+        Assertions.assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
        doc = ripper.getNextPage(doc);
-        Assertions.assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
+        Assertions.assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
+    }
+
+    @Test
+    public void testRedditRedgifs() throws IOException {
+        RedditRipper ripper = new RedditRipper(new URL("https://www.reddit.com/r/nsfwhardcore/comments/ouz5bw/me_cumming_on_his_face/"));
+        testRipper(ripper);
    }
 }