mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-04-22 12:42:02 +02:00
RedgifsRipper: Implemented redgifs Ripper
- Using similar functionality found in the gfycat ripper the same was done for the redgifs ripper - A user's profile can be ripped, by retrieving all gfycats from their user api call - The categories and search ripper is also implemented but uses and incrementing start and count in the api url paramters as the cursor requires cookies (I believe) to work and this one just increments until there are no gfycats and returns error (there are no more gfycats) - Differentiated between getting profile and search urls using regex matching
This commit is contained in:
parent
00cc08ec27
commit
9a013c74ee
@ -0,0 +1,186 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class RedgifsRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String HOST = "redgifs.com";
|
||||
String username = "";
|
||||
String cursor = "";
|
||||
String count = "100";
|
||||
|
||||
String searchText = "";
|
||||
int searchCount = 150;
|
||||
int searchStart = 0;
|
||||
|
||||
public RedgifsRipper(URL url) throws IOException {
|
||||
super(new URL(url.toExternalForm().replace("thumbs.", "")));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() { return "redgifs.com"; }
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "redgifs";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith(HOST);
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
String sUrl = url.toExternalForm();
|
||||
sUrl = sUrl.replace("/gifs/detail", "");
|
||||
sUrl = sUrl.replace("/amp", "");
|
||||
return new URL(sUrl);
|
||||
}
|
||||
|
||||
public Matcher isProfile() {
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/users/([a-zA-Z0-9_-]+).*$");
|
||||
return p.matcher(url.toExternalForm());
|
||||
}
|
||||
|
||||
public Matcher isSearch() {
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/gifs/browse/([a-zA-Z0-9_-]+).*$");
|
||||
return p.matcher(url.toExternalForm());
|
||||
}
|
||||
|
||||
public Matcher isSingleton() {
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_-]+).*$");
|
||||
return p.matcher(url.toExternalForm());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (!isProfile().matches() && !isSearch().matches()) {
|
||||
return Http.url(url).get();
|
||||
} else if (isSearch().matches()) {
|
||||
searchText = getGID(url).replace("-", " ");
|
||||
return Http.url(
|
||||
new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
|
||||
} else {
|
||||
username = getGID(url);
|
||||
return Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count))
|
||||
.ignoreContentType().get();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
|
||||
Matcher m = isProfile();
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
m = isSearch();
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
m = isSingleton();
|
||||
if (m.matches()) {
|
||||
return m.group(1).split("-")[0];
|
||||
}
|
||||
throw new MalformedURLException(
|
||||
"Expected redgifs.com format: "
|
||||
+ "redgifs.com/id or "
|
||||
+ "thumbs.redgifs.com/id.gif"
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
private String stripHTMLTags(String t) {
|
||||
t = t.replaceAll("<html>\n" +
|
||||
" <head></head>\n" +
|
||||
" <body>", "");
|
||||
t = t.replaceAll("</body>\n" +
|
||||
"</html>", "");
|
||||
t = t.replaceAll("\n", "");
|
||||
t = t.replaceAll("=\"\"", "");
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
if (!isProfile().matches()) {
|
||||
return Http.url(
|
||||
new URL("https://api.redgifs.com/v1/gfycats/search?search_text=" + searchText
|
||||
+ "&count=" + searchCount + "&start=" + searchCount*++searchStart))
|
||||
.ignoreContentType().get();
|
||||
} else {
|
||||
if (cursor.equals("")) {
|
||||
throw new IOException("No more pages");
|
||||
} else {
|
||||
return Http.url(new URL("https://api.redgifs.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
if (isProfile().matches() || isSearch().matches()) {
|
||||
JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
|
||||
JSONArray content = page.getJSONArray("gfycats");
|
||||
for (int i = 0; i < content.length(); i++) {
|
||||
result.add(content.getJSONObject(i).getString("mp4Url"));
|
||||
}
|
||||
cursor = page.getString("cursor");
|
||||
} else {
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
result.add(page.getJSONObject("video").getString("contentUrl"));
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for retrieving video URLs.
|
||||
* @param url URL to gfycat page
|
||||
* @return URL to video
|
||||
* @throws IOException
|
||||
*/
|
||||
public static String getVideoURL(URL url) throws IOException {
|
||||
LOGGER.info("Retrieving " + url.toExternalForm());
|
||||
|
||||
//Sanitize the URL first
|
||||
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
|
||||
|
||||
Document doc = Http.url(url).get();
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
return page.getJSONObject("video").getString("contentUrl");
|
||||
}
|
||||
}
|
||||
throw new IOException();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.GfycatRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.junit.jupiter.api.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
public class RedgifsRipperTest extends RippersTest {
|
||||
|
||||
/**
|
||||
* Rips correctly formatted URL directly from Redgifs
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testRedgifsGoodURL() throws IOException{
|
||||
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/watch/blaringbonyfulmar-panty-peel"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Rips a Redifs profile
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testRedgifsProfile() throws IOException {
|
||||
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/users/margo_monty"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rips a Redifs category/search
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testRedgifsSearch() throws IOException {
|
||||
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/gifs/browse/little-caprice"));
|
||||
Document doc = ripper.getFirstPage();
|
||||
|
||||
doc = ripper.getNextPage(doc);
|
||||
assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
|
||||
doc = ripper.getNextPage(doc);
|
||||
assertTrue("https://api.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user