1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-04-21 12:12:38 +02:00

Merge pull request #1672 from borderline232/master

RedgifsRipper: Implemented redgifs.com Ripper
This commit is contained in:
cyian-1756 2020-06-19 17:40:32 +00:00 committed by GitHub
commit adc352e483
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 270 additions and 0 deletions

View File

@ -0,0 +1,201 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RedgifsRipper extends AbstractHTMLRipper {
private static final String HOST = "redgifs.com";
private static final String HOST_2 = "gifdeliverynetwork.com";
String username = "";
String cursor = "";
String count = "100";
String searchText = "";
int searchCount = 150;
int searchStart = 0;
public RedgifsRipper(URL url) throws IOException {
super(new URL(url.toExternalForm().replace("thumbs.", "")));
}
@Override
public String getDomain() { return "redgifs.com"; }
@Override
public String getHost() {
return "redgifs";
}
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(HOST) || url.getHost().endsWith(HOST_2);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
String sUrl = url.toExternalForm();
sUrl = sUrl.replace("/gifs/detail", "");
sUrl = sUrl.replace("/amp", "");
sUrl = sUrl.replace("gifdeliverynetwork.com", "redgifs.com/watch");
return new URL(sUrl);
}
public Matcher isProfile() {
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/users/([a-zA-Z0-9_-]+).*$");
return p.matcher(url.toExternalForm());
}
public Matcher isSearch() {
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/gifs/browse/([a-zA-Z0-9_-]+).*$");
return p.matcher(url.toExternalForm());
}
public Matcher isSingleton() {
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_-]+).*$");
return p.matcher(url.toExternalForm());
}
@Override
public Document getFirstPage() throws IOException {
if (!isProfile().matches() && !isSearch().matches()) {
return Http.url(url).get();
} else if (isSearch().matches()) {
searchText = getGID(url).replace("-", " ");
return Http.url(
new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
} else {
username = getGID(url);
return Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count))
.ignoreContentType().get();
}
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m = isProfile();
if (m.matches()) {
return m.group(1);
}
m = isSearch();
if (m.matches()) {
return m.group(1);
}
m = isSingleton();
if (m.matches()) {
return m.group(1).split("-")[0];
}
throw new MalformedURLException(
"Expected redgifs.com format: "
+ "redgifs.com/id or "
+ "thumbs.redgifs.com/id.gif"
+ " Got: " + url);
}
private String stripHTMLTags(String t) {
t = t.replaceAll("<html>\n" +
" <head></head>\n" +
" <body>", "");
t = t.replaceAll("</body>\n" +
"</html>", "");
t = t.replaceAll("\n", "");
t = t.replaceAll("=\"\"", "");
return t;
}
@Override
public Document getNextPage(Document doc) throws IOException {
if (isSearch().matches()) {
Document d = Http.url(
new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText
+ "&count=" + searchCount + "&start=" + searchCount*++searchStart))
.ignoreContentType().get();
return (hasURLs(d).isEmpty()) ? null : d;
} else {
if (cursor.equals("")) {
return null;
} else {
Document d = Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
return (hasURLs(d).isEmpty()) ? null : d;
}
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
if (isProfile().matches() || isSearch().matches()) {
result = hasURLs(doc);
} else {
Elements videos = doc.select("script");
for (Element el : videos) {
String json = el.html();
if (json.startsWith("{")) {
JSONObject page = new JSONObject(json);
result.add(page.getJSONObject("video").getString("contentUrl"));
}
}
}
return result;
}
/**
* Helper method for retrieving URLs.
* @param doc Document of the URL page to look through
* @return List of URLs to download
*/
public List<String> hasURLs(Document doc) {
List<String> result = new ArrayList<>();
JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
JSONArray content = page.getJSONArray("gfycats");
for (int i = 0; i < content.length(); i++) {
result.add(content.getJSONObject(i).getString("mp4Url"));
}
cursor = page.getString("cursor");
return result;
}
/**
* Helper method for retrieving video URLs.
* @param url URL to gfycat page
* @return URL to video
* @throws IOException
*/
public static String getVideoURL(URL url) throws IOException {
LOGGER.info("Retrieving " + url.toExternalForm());
//Sanitize the URL first
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
Document doc = Http.url(url).get();
Elements videos = doc.select("script");
for (Element el : videos) {
String json = el.html();
if (json.startsWith("{")) {
JSONObject page = new JSONObject(json);
return page.getJSONObject("video").getString("contentUrl");
}
}
throw new IOException();
}
}

View File

@ -11,6 +11,7 @@ import com.rarchives.ripme.ripper.AbstractRipper;
import com.rarchives.ripme.ripper.rippers.EroShareRipper;
import com.rarchives.ripme.ripper.rippers.EromeRipper;
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
import com.rarchives.ripme.ripper.rippers.VidbleRipper;
import com.rarchives.ripme.ripper.rippers.GfycatRipper;
import org.apache.commons.lang.math.NumberUtils;
@ -76,6 +77,18 @@ public class RipUtils {
}
return result;
}
else if (url.getHost().endsWith("redgifs.com") || url.getHost().endsWith("gifdeliverynetwork.com")) {
try {
logger.debug("Fetching redgifs page " + url);
String videoURL = RedgifsRipper.getVideoURL(url);
logger.debug("Got redgifs URL: " + videoURL);
result.add(new URL(videoURL));
} catch (IOException e) {
// Do nothing
logger.warn("Exception while retrieving redgifs page:", e);
}
return result;
}
else if (url.toExternalForm().contains("vidble.com/album/") || url.toExternalForm().contains("vidble.com/show/")) {
try {
logger.info("Getting vidble album " + url);

View File

@ -0,0 +1,56 @@
package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.*;
import java.io.IOException;
import java.net.URL;
public class RedgifsRipperTest extends RippersTest {
/**
* Rips correctly formatted URL directly from Redgifs
* @throws IOException
*/
@Test
public void testRedgifsGoodURL() throws IOException{
RedgifsRipper ripper = new RedgifsRipper(new URL("https://www.redgifs.com/watch/talkativewarpeddragon-petite"));
testRipper(ripper);
}
/**
* Rips gifdeliverynetwork URL's by redirecting them to proper redgifs url
* @throws IOException
*/
@Test
public void testRedgifsBadRL() throws IOException{
RedgifsRipper ripper = new RedgifsRipper(new URL("https://www.gifdeliverynetwork.com/foolishelasticchimpanzee"));
testRipper(ripper);
}
/**
* Rips a Redifs profile
* @throws IOException
*/
@Test
public void testRedgifsProfile() throws IOException {
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/users/margo_monty"));
testRipper(ripper);
}
/**
* Rips a Redifs category/search
* @throws IOException
*/
@Test
public void testRedgifsSearch() throws IOException {
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/gifs/browse/little-caprice"));
Document doc = ripper.getFirstPage();
doc = ripper.getNextPage(doc);
assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
doc = ripper.getNextPage(doc);
assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
}
}