mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-01-17 20:58:31 +01:00
Added Reddit selfPost functionality and fixed Redgifs
- Added selfpost download functionality, currently it is fixed and cannot be disabled in the jar - Fixed Redgifs using mobile mp4 in its document and instead uses its api instead to fetch the hd version
This commit is contained in:
parent
18f141bbef
commit
67dd4875d9
@ -1,14 +1,18 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import j2html.TagCreator;
|
||||
import j2html.tags.ContainerTag;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
@ -19,6 +23,9 @@ import com.rarchives.ripme.ui.UpdateUtils;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.jsoup.Jsoup;
|
||||
|
||||
import static j2html.TagCreator.*;
|
||||
|
||||
public class RedditRipper extends AlbumRipper {
|
||||
|
||||
@ -104,6 +111,14 @@ public class RedditRipper extends AlbumRipper {
|
||||
children = data.getJSONArray("children");
|
||||
for (int j = 0; j < children.length(); j++) {
|
||||
parseJsonChild(children.getJSONObject(j));
|
||||
|
||||
if (children.getJSONObject(j).getString("kind").equals("t3") &&
|
||||
children.getJSONObject(j).getJSONObject("data").getBoolean("is_self")
|
||||
) {
|
||||
URL selfPostURL = new URL(children.getJSONObject(j).getJSONObject("data").getString("url"));
|
||||
System.out.println(selfPostURL.toExternalForm());
|
||||
saveText(getJsonArrayFromURL(getJsonURL(selfPostURL)));
|
||||
}
|
||||
}
|
||||
if (data.has("after") && !data.isNull("after")) {
|
||||
String nextURLString = Utils.stripURLParameter(url.toExternalForm(), "after");
|
||||
@ -225,6 +240,112 @@ public class RedditRipper extends AlbumRipper {
|
||||
}
|
||||
}
|
||||
|
||||
private void saveText(JSONArray jsonArray) throws JSONException {
|
||||
File saveFileAs;
|
||||
|
||||
JSONObject selfPost = jsonArray.getJSONObject(0).getJSONObject("data")
|
||||
.getJSONArray("children").getJSONObject(0).getJSONObject("data");
|
||||
JSONArray comments = jsonArray.getJSONObject(1).getJSONObject("data")
|
||||
.getJSONArray("children");
|
||||
|
||||
if (selfPost.getString("selftext").equals("")) { return; }
|
||||
|
||||
final String title = selfPost.getString("title");
|
||||
final String id = selfPost.getString("id");
|
||||
final String author = selfPost.getString("author");
|
||||
final String creationDate = new Date((long) selfPost.getInt("created") * 1000).toString();
|
||||
final String subreddit = selfPost.getString("subreddit");
|
||||
final String selfText = selfPost.getString("selftext_html");
|
||||
final String permalink = selfPost.getString("url");
|
||||
|
||||
String html = TagCreator.html(
|
||||
head(
|
||||
title(title),
|
||||
style(rawHtml(HTML_STYLING))
|
||||
),
|
||||
body(
|
||||
div(
|
||||
h1(title),
|
||||
a(subreddit).withHref("https://www.reddit.com/r/" + subreddit),
|
||||
a("Original").withHref(permalink),
|
||||
br()
|
||||
).withClass("thing"),
|
||||
div(
|
||||
div(
|
||||
span(
|
||||
a(author).withHref("https://www.reddit.com/u/" + author)
|
||||
).withClass("author op")
|
||||
).withClass("thing oppost")
|
||||
.withText(creationDate)
|
||||
.with(rawHtml(Jsoup.parse(selfText).text()))
|
||||
).withClass("flex")
|
||||
).with(getComments(comments, author)),
|
||||
script(rawHtml(HTML_SCRIPT))
|
||||
).renderFormatted();
|
||||
|
||||
try {
|
||||
saveFileAs = new File(workingDir.getCanonicalPath()
|
||||
+ "" + File.separator
|
||||
+ id + "_" + title.replaceAll("[\\\\/:*?\"<>|]", "")
|
||||
+ ".html");
|
||||
FileOutputStream out = new FileOutputStream(saveFileAs);
|
||||
out.write(html.getBytes());
|
||||
out.close();
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("[!] Error creating save file path for description '" + url + "':", e);
|
||||
return;
|
||||
}
|
||||
|
||||
LOGGER.debug("Downloading " + url + "'s self post to " + saveFileAs);
|
||||
super.retrievingSource(permalink);
|
||||
if (!saveFileAs.getParentFile().exists()) {
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
|
||||
saveFileAs.getParentFile().mkdirs();
|
||||
}
|
||||
}
|
||||
|
||||
private ContainerTag getComments(JSONArray comments, String author) {
|
||||
ContainerTag commentsDiv = div().withId("comments");
|
||||
|
||||
for (int i = 0; i < comments.length(); i++) {
|
||||
JSONObject data = comments.getJSONObject(i).getJSONObject("data");
|
||||
|
||||
ContainerTag commentDiv =
|
||||
div(
|
||||
span(data.getString("author")).withClasses("author", iff(data.getString("author").equals(author), "op")),
|
||||
a(new Date((long) data.getInt("created") * 1000).toString()).withHref("#" + data.getString("name"))
|
||||
).withClass("thing comment").withId(data.getString("name"))
|
||||
.with(rawHtml(Jsoup.parse(data.getString("body_html")).text()));
|
||||
|
||||
commentDiv = getNestedComments(data, commentDiv, author);
|
||||
commentsDiv.with(commentDiv);
|
||||
}
|
||||
return commentsDiv;
|
||||
}
|
||||
|
||||
private ContainerTag getNestedComments(JSONObject data, ContainerTag parentDiv, String author) {
|
||||
if (data.has("replies") && data.get("replies") instanceof JSONObject) {
|
||||
for (int i = 0; i <= data.getJSONObject("replies").getJSONObject("data").getJSONArray("children").length() - 1; i++) {
|
||||
JSONObject nestedComment = data.getJSONObject("replies")
|
||||
.getJSONObject("data")
|
||||
.getJSONArray("children")
|
||||
.getJSONObject(i).getJSONObject("data");
|
||||
|
||||
ContainerTag childDiv =
|
||||
div(
|
||||
div(
|
||||
span(nestedComment.getString("author")).withClasses("author", iff(nestedComment.getString("author").equals(author), "op")),
|
||||
a(new Date((long) nestedComment.getInt("created") * 1000).toString()).withHref("#" + nestedComment.getString("name"))
|
||||
).withClass("comment").withId(nestedComment.getString("name"))
|
||||
.with(rawHtml(Jsoup.parse(nestedComment.getString("body_html")).text()))
|
||||
).withClass("child");
|
||||
|
||||
parentDiv.with(getNestedComments(nestedComment, childDiv, author));
|
||||
}
|
||||
}
|
||||
return parentDiv;
|
||||
}
|
||||
|
||||
private URL parseRedditVideoMPD(String vidURL) {
|
||||
org.jsoup.nodes.Document doc = null;
|
||||
try {
|
||||
@ -369,4 +490,7 @@ public class RedditRipper extends AlbumRipper {
|
||||
throw new MalformedURLException("Only accepts user pages, subreddits, post, or gallery can't understand " + url);
|
||||
}
|
||||
|
||||
private static final String HTML_STYLING = " .author { font-weight: bold; } .op { color: blue; } .comment { border: 0px; margin: 0 0 25px; padding-left: 5px; } .child { margin: 2px 0 0 20px; border-left: 2px dashed #AAF; } .collapsed { background: darkgrey; margin-bottom: 0; } .collapsed > div { display: none; } .md { max-width: 840px; padding-right: 1em; } h1 { margin: 0; } body { position: relative; background-color: #eeeeec; color: #00000a; font-weight: 400; font-style: normal; font-variant: normal; font-family: Helvetica,Arial,sans-serif; line-height: 1.4 } blockquote { margin: 5px 5px 5px 15px; padding: 1px 1px 1px 15px; max-width: 60em; border: 1px solid #ccc; border-width: 0 0 0 1px; } pre { white-space: pre-wrap; } img, video { max-width: 60vw; max-height: 90vh; object-fit: contain; } .thing { overflow: hidden; margin: 0 5px 3px 40px; border: 1px solid #e0e0e0; background-color: #fcfcfb; } :target > .md { border: 5px solid blue; } .post { margin-bottom: 20px; margin-top: 20px; } .gold { background: goldenrod; } .silver { background: silver; } .platinum { background: aqua; } .deleted { background: #faa; } .md.deleted { background: inherit; border: 5px solid #faa; } .oppost { background-color: #EEF; } blockquote > p { margin: 0; } #related { max-height: 20em; overflow-y: scroll; background-color: #F4FFF4; } #related h3 { position: sticky; top: 0; background-color: white; } .flex { display: flex; flex-flow: wrap; flex-direction: row-reverse; justify-content: flex-end; } ";
|
||||
private static final String HTML_SCRIPT = "document.addEventListener('mousedown', function(e) { var t = e.target; if (t.className == 'author') { t = t.parentElement; } if (t.classList.contains('comment')) { t.classList.toggle('collapsed'); e.preventDefault(); e.stopPropagation(); return false; } });";
|
||||
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
@ -72,14 +73,15 @@ public class RedgifsRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (!isProfile().matches() && !isSearch().matches()) {
|
||||
return Http.url(url).get();
|
||||
return Jsoup.connect(getJsonURL(url).toExternalForm())
|
||||
.ignoreContentType(true).get();
|
||||
} else if (isSearch().matches()) {
|
||||
searchText = getGID(url).replace("-", " ");
|
||||
return Http.url(
|
||||
new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
|
||||
new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
|
||||
} else {
|
||||
username = getGID(url);
|
||||
return Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count))
|
||||
return Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count))
|
||||
.ignoreContentType().get();
|
||||
}
|
||||
}
|
||||
@ -126,7 +128,7 @@ public class RedgifsRipper extends AbstractHTMLRipper {
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
if (isSearch().matches()) {
|
||||
Document d = Http.url(
|
||||
new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText
|
||||
new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText
|
||||
+ "&count=" + searchCount + "&start=" + searchCount*++searchStart))
|
||||
.ignoreContentType().get();
|
||||
return (hasURLs(d).isEmpty()) ? null : d;
|
||||
@ -134,7 +136,7 @@ public class RedgifsRipper extends AbstractHTMLRipper {
|
||||
if (cursor.equals("") || cursor.equals("null")) {
|
||||
return null;
|
||||
} else {
|
||||
Document d = Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
|
||||
Document d = Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
|
||||
return (hasURLs(d).isEmpty()) ? null : d;
|
||||
}
|
||||
}
|
||||
@ -146,14 +148,9 @@ public class RedgifsRipper extends AbstractHTMLRipper {
|
||||
if (isProfile().matches() || isSearch().matches()) {
|
||||
result = hasURLs(doc);
|
||||
} else {
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
result.add(page.getJSONObject("video").getString("contentUrl"));
|
||||
}
|
||||
}
|
||||
JSONObject api = new JSONObject(doc.body().html());
|
||||
result.add(api.getJSONObject("gfyItem").getString("mp4Url"));
|
||||
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -183,19 +180,29 @@ public class RedgifsRipper extends AbstractHTMLRipper {
|
||||
public static String getVideoURL(URL url) throws IOException {
|
||||
LOGGER.info("Retrieving " + url.toExternalForm());
|
||||
|
||||
//Sanitize the URL first
|
||||
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
|
||||
try {
|
||||
Document doc = Jsoup.connect(getJsonURL(url).toExternalForm())
|
||||
.ignoreContentType(true).get();
|
||||
|
||||
Document doc = Http.url(url).get();
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
return page.getJSONObject("video").getString("contentUrl");
|
||||
}
|
||||
JSONObject api = new JSONObject(doc.body().html());
|
||||
return api.getJSONObject("gfyItem").getJSONObject("content_urls")
|
||||
.getJSONObject("mp4").getString("url");
|
||||
|
||||
} catch (NullPointerException e) {
|
||||
return null;
|
||||
}
|
||||
throw new IOException();
|
||||
}
|
||||
|
||||
public static URL getJsonURL(URL url) throws MalformedURLException{
|
||||
String regex = "^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_]+).*$";
|
||||
|
||||
final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
|
||||
final Matcher matcher = pattern.matcher(url.toExternalForm());
|
||||
|
||||
if (matcher.matches()) {
|
||||
return new URL("https://api.redgifs.com/v1/gfycats/" + matcher.group(1));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -47,6 +47,20 @@ public class RedditRipperTest extends RippersTest {
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSelfPostRip() throws IOException {
|
||||
RedditRipper ripper = new RedditRipper(
|
||||
new URL("https://www.reddit.com/r/gonewildstories/comments/oz7d97/f_18_finally_having_a_normal_sex_life/")
|
||||
);
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSelfPostAuthorRip() throws IOException {
|
||||
RedditRipper ripper = new RedditRipper(new URL("https://www.reddit.com/user/ickybabie_"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
/**
|
||||
* GFYCAT TEST Tests a Bad URL with the "/gifs/detail" inside.
|
||||
*
|
||||
|
@ -1,5 +1,6 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.RedditRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.junit.jupiter.api.*;
|
||||
@ -53,8 +54,14 @@ public class RedgifsRipperTest extends RippersTest {
|
||||
Document doc = ripper.getFirstPage();
|
||||
|
||||
doc = ripper.getNextPage(doc);
|
||||
Assertions.assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
|
||||
Assertions.assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
|
||||
doc = ripper.getNextPage(doc);
|
||||
Assertions.assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
|
||||
Assertions.assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRedditRedgifs() throws IOException {
|
||||
RedditRipper ripper = new RedditRipper(new URL("https://www.reddit.com/r/nsfwhardcore/comments/ouz5bw/me_cumming_on_his_face/"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user