1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-04-22 12:42:02 +02:00

RedgifsRipper: Implemented redgifs Ripper

- Using similar functionality found in the gfycat ripper the same was
  done for the redgifs ripper
- A user's profile can be ripped, by retrieving all gfycats from their
    user api call
- The categories and search ripper is also implemented but uses and
  incrementing start and count in the api url paramters as the
  cursor requires cookies (I believe) to work and this one just
  increments until there are no gfycats and returns error (there are
  no more gfycats)
- Differentiated between getting profile and search urls using regex
  matching
This commit is contained in:
borderline232 2020-05-23 14:37:10 -04:00
parent 00cc08ec27
commit 9a013c74ee
2 changed files with 234 additions and 0 deletions

View File

@ -0,0 +1,186 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RedgifsRipper extends AbstractHTMLRipper {
private static final String HOST = "redgifs.com";
String username = "";
String cursor = "";
String count = "100";
String searchText = "";
int searchCount = 150;
int searchStart = 0;
public RedgifsRipper(URL url) throws IOException {
super(new URL(url.toExternalForm().replace("thumbs.", "")));
}
@Override
public String getDomain() { return "redgifs.com"; }
@Override
public String getHost() {
return "redgifs";
}
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(HOST);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
String sUrl = url.toExternalForm();
sUrl = sUrl.replace("/gifs/detail", "");
sUrl = sUrl.replace("/amp", "");
return new URL(sUrl);
}
public Matcher isProfile() {
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/users/([a-zA-Z0-9_-]+).*$");
return p.matcher(url.toExternalForm());
}
public Matcher isSearch() {
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/gifs/browse/([a-zA-Z0-9_-]+).*$");
return p.matcher(url.toExternalForm());
}
public Matcher isSingleton() {
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_-]+).*$");
return p.matcher(url.toExternalForm());
}
@Override
public Document getFirstPage() throws IOException {
if (!isProfile().matches() && !isSearch().matches()) {
return Http.url(url).get();
} else if (isSearch().matches()) {
searchText = getGID(url).replace("-", " ");
return Http.url(
new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
} else {
username = getGID(url);
return Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count))
.ignoreContentType().get();
}
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m = isProfile();
if (m.matches()) {
return m.group(1);
}
m = isSearch();
if (m.matches()) {
return m.group(1);
}
m = isSingleton();
if (m.matches()) {
return m.group(1).split("-")[0];
}
throw new MalformedURLException(
"Expected redgifs.com format: "
+ "redgifs.com/id or "
+ "thumbs.redgifs.com/id.gif"
+ " Got: " + url);
}
private String stripHTMLTags(String t) {
t = t.replaceAll("<html>\n" +
" <head></head>\n" +
" <body>", "");
t = t.replaceAll("</body>\n" +
"</html>", "");
t = t.replaceAll("\n", "");
t = t.replaceAll("=\"\"", "");
return t;
}
@Override
public Document getNextPage(Document doc) throws IOException {
if (!isProfile().matches()) {
return Http.url(
new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText
+ "&count=" + searchCount + "&start=" + searchCount*++searchStart))
.ignoreContentType().get();
} else {
if (cursor.equals("")) {
throw new IOException("No more pages");
} else {
return Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
}
}
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
if (isProfile().matches() || isSearch().matches()) {
JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
JSONArray content = page.getJSONArray("gfycats");
for (int i = 0; i < content.length(); i++) {
result.add(content.getJSONObject(i).getString("mp4Url"));
}
cursor = page.getString("cursor");
} else {
Elements videos = doc.select("script");
for (Element el : videos) {
String json = el.html();
if (json.startsWith("{")) {
JSONObject page = new JSONObject(json);
result.add(page.getJSONObject("video").getString("contentUrl"));
}
}
}
return result;
}
/**
* Helper method for retrieving video URLs.
* @param url URL to gfycat page
* @return URL to video
* @throws IOException
*/
public static String getVideoURL(URL url) throws IOException {
LOGGER.info("Retrieving " + url.toExternalForm());
//Sanitize the URL first
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
Document doc = Http.url(url).get();
Elements videos = doc.select("script");
for (Element el : videos) {
String json = el.html();
if (json.startsWith("{")) {
JSONObject page = new JSONObject(json);
return page.getJSONObject("video").getString("contentUrl");
}
}
throw new IOException();
}
}

View File

@ -0,0 +1,48 @@
package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.GfycatRipper;
import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.*;
import java.io.IOException;
import java.net.URL;
public class RedgifsRipperTest extends RippersTest {
/**
* Rips correctly formatted URL directly from Redgifs
* @throws IOException
*/
@Test
public void testRedgifsGoodURL() throws IOException{
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/watch/blaringbonyfulmar-panty-peel"));
testRipper(ripper);
}
/**
* Rips a Redifs profile
* @throws IOException
*/
@Test
public void testRedgifsProfile() throws IOException {
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/users/margo_monty"));
testRipper(ripper);
}
/**
* Rips a Redifs category/search
* @throws IOException
*/
@Test
public void testRedgifsSearch() throws IOException {
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/gifs/browse/little-caprice"));
Document doc = ripper.getFirstPage();
doc = ripper.getNextPage(doc);
assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
doc = ripper.getNextPage(doc);
assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
}
}