mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-04-21 12:12:38 +02:00
Merge pull request #1672 from borderline232/master
RedgifsRipper: Implemented redgifs.com Ripper
This commit is contained in:
commit
adc352e483
@ -0,0 +1,201 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class RedgifsRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String HOST = "redgifs.com";
|
||||
private static final String HOST_2 = "gifdeliverynetwork.com";
|
||||
String username = "";
|
||||
String cursor = "";
|
||||
String count = "100";
|
||||
|
||||
String searchText = "";
|
||||
int searchCount = 150;
|
||||
int searchStart = 0;
|
||||
|
||||
public RedgifsRipper(URL url) throws IOException {
|
||||
super(new URL(url.toExternalForm().replace("thumbs.", "")));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() { return "redgifs.com"; }
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "redgifs";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith(HOST) || url.getHost().endsWith(HOST_2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
String sUrl = url.toExternalForm();
|
||||
sUrl = sUrl.replace("/gifs/detail", "");
|
||||
sUrl = sUrl.replace("/amp", "");
|
||||
sUrl = sUrl.replace("gifdeliverynetwork.com", "redgifs.com/watch");
|
||||
return new URL(sUrl);
|
||||
}
|
||||
|
||||
public Matcher isProfile() {
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/users/([a-zA-Z0-9_-]+).*$");
|
||||
return p.matcher(url.toExternalForm());
|
||||
}
|
||||
|
||||
public Matcher isSearch() {
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/gifs/browse/([a-zA-Z0-9_-]+).*$");
|
||||
return p.matcher(url.toExternalForm());
|
||||
}
|
||||
|
||||
public Matcher isSingleton() {
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_-]+).*$");
|
||||
return p.matcher(url.toExternalForm());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (!isProfile().matches() && !isSearch().matches()) {
|
||||
return Http.url(url).get();
|
||||
} else if (isSearch().matches()) {
|
||||
searchText = getGID(url).replace("-", " ");
|
||||
return Http.url(
|
||||
new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
|
||||
} else {
|
||||
username = getGID(url);
|
||||
return Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count))
|
||||
.ignoreContentType().get();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
|
||||
Matcher m = isProfile();
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
m = isSearch();
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
m = isSingleton();
|
||||
if (m.matches()) {
|
||||
return m.group(1).split("-")[0];
|
||||
}
|
||||
throw new MalformedURLException(
|
||||
"Expected redgifs.com format: "
|
||||
+ "redgifs.com/id or "
|
||||
+ "thumbs.redgifs.com/id.gif"
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
private String stripHTMLTags(String t) {
|
||||
t = t.replaceAll("<html>\n" +
|
||||
" <head></head>\n" +
|
||||
" <body>", "");
|
||||
t = t.replaceAll("</body>\n" +
|
||||
"</html>", "");
|
||||
t = t.replaceAll("\n", "");
|
||||
t = t.replaceAll("=\"\"", "");
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
if (isSearch().matches()) {
|
||||
Document d = Http.url(
|
||||
new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText
|
||||
+ "&count=" + searchCount + "&start=" + searchCount*++searchStart))
|
||||
.ignoreContentType().get();
|
||||
return (hasURLs(d).isEmpty()) ? null : d;
|
||||
} else {
|
||||
if (cursor.equals("")) {
|
||||
return null;
|
||||
} else {
|
||||
Document d = Http.url(new URL("https://napi.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
|
||||
return (hasURLs(d).isEmpty()) ? null : d;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
if (isProfile().matches() || isSearch().matches()) {
|
||||
result = hasURLs(doc);
|
||||
} else {
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
result.add(page.getJSONObject("video").getString("contentUrl"));
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for retrieving URLs.
|
||||
* @param doc Document of the URL page to look through
|
||||
* @return List of URLs to download
|
||||
*/
|
||||
public List<String> hasURLs(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
|
||||
JSONArray content = page.getJSONArray("gfycats");
|
||||
for (int i = 0; i < content.length(); i++) {
|
||||
result.add(content.getJSONObject(i).getString("mp4Url"));
|
||||
}
|
||||
cursor = page.getString("cursor");
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for retrieving video URLs.
|
||||
* @param url URL to gfycat page
|
||||
* @return URL to video
|
||||
* @throws IOException
|
||||
*/
|
||||
public static String getVideoURL(URL url) throws IOException {
|
||||
LOGGER.info("Retrieving " + url.toExternalForm());
|
||||
|
||||
//Sanitize the URL first
|
||||
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
|
||||
|
||||
Document doc = Http.url(url).get();
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
return page.getJSONObject("video").getString("contentUrl");
|
||||
}
|
||||
}
|
||||
throw new IOException();
|
||||
}
|
||||
|
||||
}
|
@ -11,6 +11,7 @@ import com.rarchives.ripme.ripper.AbstractRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.EroShareRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.EromeRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.VidbleRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.GfycatRipper;
|
||||
import org.apache.commons.lang.math.NumberUtils;
|
||||
@ -76,6 +77,18 @@ public class RipUtils {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
else if (url.getHost().endsWith("redgifs.com") || url.getHost().endsWith("gifdeliverynetwork.com")) {
|
||||
try {
|
||||
logger.debug("Fetching redgifs page " + url);
|
||||
String videoURL = RedgifsRipper.getVideoURL(url);
|
||||
logger.debug("Got redgifs URL: " + videoURL);
|
||||
result.add(new URL(videoURL));
|
||||
} catch (IOException e) {
|
||||
// Do nothing
|
||||
logger.warn("Exception while retrieving redgifs page:", e);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
else if (url.toExternalForm().contains("vidble.com/album/") || url.toExternalForm().contains("vidble.com/show/")) {
|
||||
try {
|
||||
logger.info("Getting vidble album " + url);
|
||||
|
@ -0,0 +1,56 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.junit.jupiter.api.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
public class RedgifsRipperTest extends RippersTest {
|
||||
|
||||
/**
|
||||
* Rips correctly formatted URL directly from Redgifs
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testRedgifsGoodURL() throws IOException{
|
||||
RedgifsRipper ripper = new RedgifsRipper(new URL("https://www.redgifs.com/watch/talkativewarpeddragon-petite"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rips gifdeliverynetwork URL's by redirecting them to proper redgifs url
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testRedgifsBadRL() throws IOException{
|
||||
RedgifsRipper ripper = new RedgifsRipper(new URL("https://www.gifdeliverynetwork.com/foolishelasticchimpanzee"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rips a Redifs profile
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testRedgifsProfile() throws IOException {
|
||||
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/users/margo_monty"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rips a Redifs category/search
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testRedgifsSearch() throws IOException {
|
||||
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/gifs/browse/little-caprice"));
|
||||
Document doc = ripper.getFirstPage();
|
||||
|
||||
doc = ripper.getNextPage(doc);
|
||||
assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
|
||||
doc = ripper.getNextPage(doc);
|
||||
assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user