1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-01-17 20:58:31 +01:00

Added Reddit selfPost functionality and fixed Redgifs

- Added selfpost download functionality, currently it is fixed and
  cannot be disabled in the jar
- Fixed Redgifs using mobile mp4 in its document and instead uses its
  api instead to fetch the hd version
This commit is contained in:
borderline232 2021-08-08 00:25:20 -04:00 committed by soloturn
parent 18f141bbef
commit 67dd4875d9
4 changed files with 178 additions and 26 deletions

View File

@ -1,14 +1,18 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage;
import j2html.TagCreator;
import j2html.tags.ContainerTag;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
@ -19,6 +23,9 @@ import com.rarchives.ripme.ui.UpdateUtils;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.Jsoup;
import static j2html.TagCreator.*;
public class RedditRipper extends AlbumRipper {
@ -104,6 +111,14 @@ public class RedditRipper extends AlbumRipper {
children = data.getJSONArray("children");
for (int j = 0; j < children.length(); j++) {
parseJsonChild(children.getJSONObject(j));
if (children.getJSONObject(j).getString("kind").equals("t3") &&
children.getJSONObject(j).getJSONObject("data").getBoolean("is_self")
) {
URL selfPostURL = new URL(children.getJSONObject(j).getJSONObject("data").getString("url"));
System.out.println(selfPostURL.toExternalForm());
saveText(getJsonArrayFromURL(getJsonURL(selfPostURL)));
}
}
if (data.has("after") && !data.isNull("after")) {
String nextURLString = Utils.stripURLParameter(url.toExternalForm(), "after");
@ -225,6 +240,112 @@ public class RedditRipper extends AlbumRipper {
}
}
private void saveText(JSONArray jsonArray) throws JSONException {
File saveFileAs;
JSONObject selfPost = jsonArray.getJSONObject(0).getJSONObject("data")
.getJSONArray("children").getJSONObject(0).getJSONObject("data");
JSONArray comments = jsonArray.getJSONObject(1).getJSONObject("data")
.getJSONArray("children");
if (selfPost.getString("selftext").equals("")) { return; }
final String title = selfPost.getString("title");
final String id = selfPost.getString("id");
final String author = selfPost.getString("author");
final String creationDate = new Date((long) selfPost.getInt("created") * 1000).toString();
final String subreddit = selfPost.getString("subreddit");
final String selfText = selfPost.getString("selftext_html");
final String permalink = selfPost.getString("url");
String html = TagCreator.html(
head(
title(title),
style(rawHtml(HTML_STYLING))
),
body(
div(
h1(title),
a(subreddit).withHref("https://www.reddit.com/r/" + subreddit),
a("Original").withHref(permalink),
br()
).withClass("thing"),
div(
div(
span(
a(author).withHref("https://www.reddit.com/u/" + author)
).withClass("author op")
).withClass("thing oppost")
.withText(creationDate)
.with(rawHtml(Jsoup.parse(selfText).text()))
).withClass("flex")
).with(getComments(comments, author)),
script(rawHtml(HTML_SCRIPT))
).renderFormatted();
try {
saveFileAs = new File(workingDir.getCanonicalPath()
+ "" + File.separator
+ id + "_" + title.replaceAll("[\\\\/:*?\"<>|]", "")
+ ".html");
FileOutputStream out = new FileOutputStream(saveFileAs);
out.write(html.getBytes());
out.close();
} catch (IOException e) {
LOGGER.error("[!] Error creating save file path for description '" + url + "':", e);
return;
}
LOGGER.debug("Downloading " + url + "'s self post to " + saveFileAs);
super.retrievingSource(permalink);
if (!saveFileAs.getParentFile().exists()) {
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
saveFileAs.getParentFile().mkdirs();
}
}
private ContainerTag getComments(JSONArray comments, String author) {
ContainerTag commentsDiv = div().withId("comments");
for (int i = 0; i < comments.length(); i++) {
JSONObject data = comments.getJSONObject(i).getJSONObject("data");
ContainerTag commentDiv =
div(
span(data.getString("author")).withClasses("author", iff(data.getString("author").equals(author), "op")),
a(new Date((long) data.getInt("created") * 1000).toString()).withHref("#" + data.getString("name"))
).withClass("thing comment").withId(data.getString("name"))
.with(rawHtml(Jsoup.parse(data.getString("body_html")).text()));
commentDiv = getNestedComments(data, commentDiv, author);
commentsDiv.with(commentDiv);
}
return commentsDiv;
}
private ContainerTag getNestedComments(JSONObject data, ContainerTag parentDiv, String author) {
if (data.has("replies") && data.get("replies") instanceof JSONObject) {
for (int i = 0; i <= data.getJSONObject("replies").getJSONObject("data").getJSONArray("children").length() - 1; i++) {
JSONObject nestedComment = data.getJSONObject("replies")
.getJSONObject("data")
.getJSONArray("children")
.getJSONObject(i).getJSONObject("data");
ContainerTag childDiv =
div(
div(
span(nestedComment.getString("author")).withClasses("author", iff(nestedComment.getString("author").equals(author), "op")),
a(new Date((long) nestedComment.getInt("created") * 1000).toString()).withHref("#" + nestedComment.getString("name"))
).withClass("comment").withId(nestedComment.getString("name"))
.with(rawHtml(Jsoup.parse(nestedComment.getString("body_html")).text()))
).withClass("child");
parentDiv.with(getNestedComments(nestedComment, childDiv, author));
}
}
return parentDiv;
}
private URL parseRedditVideoMPD(String vidURL) {
org.jsoup.nodes.Document doc = null;
try {
@ -369,4 +490,7 @@ public class RedditRipper extends AlbumRipper {
throw new MalformedURLException("Only accepts user pages, subreddits, post, or gallery can't understand " + url);
}
private static final String HTML_STYLING = " .author { font-weight: bold; } .op { color: blue; } .comment { border: 0px; margin: 0 0 25px; padding-left: 5px; } .child { margin: 2px 0 0 20px; border-left: 2px dashed #AAF; } .collapsed { background: darkgrey; margin-bottom: 0; } .collapsed > div { display: none; } .md { max-width: 840px; padding-right: 1em; } h1 { margin: 0; } body { position: relative; background-color: #eeeeec; color: #00000a; font-weight: 400; font-style: normal; font-variant: normal; font-family: Helvetica,Arial,sans-serif; line-height: 1.4 } blockquote { margin: 5px 5px 5px 15px; padding: 1px 1px 1px 15px; max-width: 60em; border: 1px solid #ccc; border-width: 0 0 0 1px; } pre { white-space: pre-wrap; } img, video { max-width: 60vw; max-height: 90vh; object-fit: contain; } .thing { overflow: hidden; margin: 0 5px 3px 40px; border: 1px solid #e0e0e0; background-color: #fcfcfb; } :target > .md { border: 5px solid blue; } .post { margin-bottom: 20px; margin-top: 20px; } .gold { background: goldenrod; } .silver { background: silver; } .platinum { background: aqua; } .deleted { background: #faa; } .md.deleted { background: inherit; border: 5px solid #faa; } .oppost { background-color: #EEF; } blockquote > p { margin: 0; } #related { max-height: 20em; overflow-y: scroll; background-color: #F4FFF4; } #related h3 { position: sticky; top: 0; background-color: white; } .flex { display: flex; flex-flow: wrap; flex-direction: row-reverse; justify-content: flex-end; } ";
private static final String HTML_SCRIPT = "document.addEventListener('mousedown', function(e) { var t = e.target; if (t.className == 'author') { t = t.parentElement; } if (t.classList.contains('comment')) { t.classList.toggle('collapsed'); e.preventDefault(); e.stopPropagation(); return false; } });";
}

View File

@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
@ -72,14 +73,15 @@ public class RedgifsRipper extends AbstractHTMLRipper {
@Override
public Document getFirstPage() throws IOException {
if (!isProfile().matches() && !isSearch().matches()) {
return Http.url(url).get();
return Jsoup.connect(getJsonURL(url).toExternalForm())
.ignoreContentType(true).get();
} else if (isSearch().matches()) {
searchText = getGID(url).replace("-", " ");
return Http.url(
new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
} else {
username = getGID(url);
return Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count))
return Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count))
.ignoreContentType().get();
}
}
@ -126,7 +128,7 @@ public class RedgifsRipper extends AbstractHTMLRipper {
public Document getNextPage(Document doc) throws IOException {
if (isSearch().matches()) {
Document d = Http.url(
new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText
new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText
+ "&count=" + searchCount + "&start=" + searchCount*++searchStart))
.ignoreContentType().get();
return (hasURLs(d).isEmpty()) ? null : d;
@ -134,7 +136,7 @@ public class RedgifsRipper extends AbstractHTMLRipper {
if (cursor.equals("") || cursor.equals("null")) {
return null;
} else {
Document d = Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
Document d = Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
return (hasURLs(d).isEmpty()) ? null : d;
}
}
@ -146,14 +148,9 @@ public class RedgifsRipper extends AbstractHTMLRipper {
if (isProfile().matches() || isSearch().matches()) {
result = hasURLs(doc);
} else {
Elements videos = doc.select("script");
for (Element el : videos) {
String json = el.html();
if (json.startsWith("{")) {
JSONObject page = new JSONObject(json);
result.add(page.getJSONObject("video").getString("contentUrl"));
}
}
JSONObject api = new JSONObject(doc.body().html());
result.add(api.getJSONObject("gfyItem").getString("mp4Url"));
}
return result;
}
@ -183,19 +180,29 @@ public class RedgifsRipper extends AbstractHTMLRipper {
public static String getVideoURL(URL url) throws IOException {
LOGGER.info("Retrieving " + url.toExternalForm());
//Sanitize the URL first
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
try {
Document doc = Jsoup.connect(getJsonURL(url).toExternalForm())
.ignoreContentType(true).get();
Document doc = Http.url(url).get();
Elements videos = doc.select("script");
for (Element el : videos) {
String json = el.html();
if (json.startsWith("{")) {
JSONObject page = new JSONObject(json);
return page.getJSONObject("video").getString("contentUrl");
}
JSONObject api = new JSONObject(doc.body().html());
return api.getJSONObject("gfyItem").getJSONObject("content_urls")
.getJSONObject("mp4").getString("url");
} catch (NullPointerException e) {
return null;
}
throw new IOException();
}
public static URL getJsonURL(URL url) throws MalformedURLException{
String regex = "^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_]+).*$";
final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
final Matcher matcher = pattern.matcher(url.toExternalForm());
if (matcher.matches()) {
return new URL("https://api.redgifs.com/v1/gfycats/" + matcher.group(1));
}
return null;
}
}

View File

@ -47,6 +47,20 @@ public class RedditRipperTest extends RippersTest {
testRipper(ripper);
}
@Test
public void testSelfPostRip() throws IOException {
RedditRipper ripper = new RedditRipper(
new URL("https://www.reddit.com/r/gonewildstories/comments/oz7d97/f_18_finally_having_a_normal_sex_life/")
);
testRipper(ripper);
}
@Test
public void testSelfPostAuthorRip() throws IOException {
RedditRipper ripper = new RedditRipper(new URL("https://www.reddit.com/user/ickybabie_"));
testRipper(ripper);
}
/**
* GFYCAT TEST Tests a Bad URL with the "/gifs/detail" inside.
*

View File

@ -1,5 +1,6 @@
package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.RedditRipper;
import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.*;
@ -53,8 +54,14 @@ public class RedgifsRipperTest extends RippersTest {
Document doc = ripper.getFirstPage();
doc = ripper.getNextPage(doc);
Assertions.assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
Assertions.assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
doc = ripper.getNextPage(doc);
Assertions.assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
Assertions.assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
}
@Test
public void testRedditRedgifs() throws IOException {
RedditRipper ripper = new RedditRipper(new URL("https://www.reddit.com/r/nsfwhardcore/comments/ouz5bw/me_cumming_on_his_face/"));
testRipper(ripper);
}
}