Merge pull request #1672 from borderline232/master

RedgifsRipper: Implemented redgifs.com Ripper
2025-04-21 12:12:38 +02:00 · 2020-06-19 17:40:32 +00:00 · 2020-06-19 17:40:32 +00:00 · adc352e483
commit adc352e483
parent 00cc08ec27 689968948c
3 changed files with 270 additions and 0 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedgifsRipper.java
@ -0,0 +1,201 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class RedgifsRipper extends AbstractHTMLRipper {
+
+    private static final String HOST = "redgifs.com";
+    private static final String HOST_2 = "gifdeliverynetwork.com";
+    String username = "";
+    String cursor = "";
+    String count = "100";
+
+    String searchText = "";
+    int searchCount = 150;
+    int searchStart = 0;
+
+    public RedgifsRipper(URL url) throws IOException {
+        super(new URL(url.toExternalForm().replace("thumbs.", "")));
+    }
+
+    @Override
+    public String getDomain() { return "redgifs.com"; }
+
+    @Override
+    public String getHost() {
+        return "redgifs";
+    }
+
+    @Override
+    public boolean canRip(URL url) {
+        return url.getHost().endsWith(HOST) || url.getHost().endsWith(HOST_2);
+    }
+
+    @Override
+    public URL sanitizeURL(URL url) throws MalformedURLException {
+        String sUrl = url.toExternalForm();
+        sUrl = sUrl.replace("/gifs/detail", "");
+        sUrl = sUrl.replace("/amp", "");
+        sUrl = sUrl.replace("gifdeliverynetwork.com", "redgifs.com/watch");
+        return new URL(sUrl);
+    }
+
+    public Matcher isProfile() {
+        Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/users/([a-zA-Z0-9_-]+).*$");
+        return p.matcher(url.toExternalForm());
+    }
+
+    public Matcher isSearch() {
+        Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/gifs/browse/([a-zA-Z0-9_-]+).*$");
+        return p.matcher(url.toExternalForm());
+    }
+
+    public Matcher isSingleton() {
+        Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/watch/([a-zA-Z0-9_-]+).*$");
+        return p.matcher(url.toExternalForm());
+    }
+
+    @Override
+    public Document getFirstPage() throws IOException {
+        if (!isProfile().matches() && !isSearch().matches()) {
+            return Http.url(url).get();
+        } else if (isSearch().matches()) {
+            searchText = getGID(url).replace("-", " ");
+            return Http.url(
+                    new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText + "&count=" + searchCount + "&start=" + searchStart*searchCount)).ignoreContentType().get();
+        } else {
+            username = getGID(url);
+            return Http.url(new URL("https://napi.redgifs.com/v1/users/" +  username + "/gfycats?count=" + count))
+                       .ignoreContentType().get();
+        }
+    }
+
+    @Override
+    public void downloadURL(URL url, int index) {
+        addURLToDownload(url, getPrefix(index));
+    }
+
+    @Override
+    public String getGID(URL url) throws MalformedURLException {
+
+        Matcher m = isProfile();
+        if (m.matches()) {
+            return m.group(1);
+        }
+        m = isSearch();
+        if (m.matches()) {
+            return m.group(1);
+        }
+        m = isSingleton();
+        if (m.matches()) {
+            return m.group(1).split("-")[0];
+        }
+        throw new MalformedURLException(
+                "Expected redgifs.com format: "
+                        + "redgifs.com/id or "
+                        + "thumbs.redgifs.com/id.gif"
+                        + " Got: " + url);
+    }
+
+    private String stripHTMLTags(String t) {
+        t = t.replaceAll("<html>\n" +
+                                 " <head></head>\n" +
+                                 " <body>", "");
+        t = t.replaceAll("</body>\n" +
+                                 "</html>", "");
+        t = t.replaceAll("\n", "");
+        t = t.replaceAll("=\"\"", "");
+        return t;
+    }
+
+    @Override
+    public Document getNextPage(Document doc) throws IOException {
+        if (isSearch().matches()) {
+            Document d = Http.url(
+                    new URL("https://napi.redgifs.com/v1/gfycats/search?search_text=" + searchText
+                                    + "&count=" + searchCount + "&start=" + searchCount*++searchStart))
+                       .ignoreContentType().get();
+            return (hasURLs(d).isEmpty()) ? null : d;
+        } else {
+            if (cursor.equals("")) {
+                return null;
+            } else {
+                Document d =  Http.url(new URL("https://napi.redgifs.com/v1/users/" +  username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
+                return (hasURLs(d).isEmpty()) ? null : d;
+            }
+        }
+    }
+
+    @Override
+    public List<String> getURLsFromPage(Document doc) {
+        List<String> result = new ArrayList<>();
+        if (isProfile().matches() || isSearch().matches()) {
+            result = hasURLs(doc);
+        } else {
+            Elements videos = doc.select("script");
+            for (Element el : videos) {
+                String json = el.html();
+                if (json.startsWith("{")) {
+                    JSONObject page = new JSONObject(json);
+                    result.add(page.getJSONObject("video").getString("contentUrl"));
+                }
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Helper method for retrieving URLs.
+     * @param doc Document of the URL page to look through
+     * @return List of URLs to download
+     */
+    public List<String> hasURLs(Document doc) {
+        List<String> result = new ArrayList<>();
+        JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
+        JSONArray content = page.getJSONArray("gfycats");
+        for (int i = 0; i < content.length(); i++) {
+            result.add(content.getJSONObject(i).getString("mp4Url"));
+        }
+        cursor = page.getString("cursor");
+        return result;
+    }
+
+    /**
+     * Helper method for retrieving video URLs.
+     * @param url URL to gfycat page
+     * @return URL to video
+     * @throws IOException
+     */
+    public static String getVideoURL(URL url) throws IOException {
+        LOGGER.info("Retrieving " + url.toExternalForm());
+
+        //Sanitize the URL first
+        url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
+
+        Document doc = Http.url(url).get();
+        Elements videos = doc.select("script");
+        for (Element el : videos) {
+            String json = el.html();
+            if (json.startsWith("{")) {
+                JSONObject page = new JSONObject(json);
+                return page.getJSONObject("video").getString("contentUrl");
+            }
+        }
+        throw new IOException();
+    }
+
+}
--- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java
+++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java
@ -11,6 +11,7 @@ import com.rarchives.ripme.ripper.AbstractRipper;
 import com.rarchives.ripme.ripper.rippers.EroShareRipper;
 import com.rarchives.ripme.ripper.rippers.EromeRipper;
 import com.rarchives.ripme.ripper.rippers.ImgurRipper;
+import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
 import com.rarchives.ripme.ripper.rippers.VidbleRipper;
 import com.rarchives.ripme.ripper.rippers.GfycatRipper;
 import org.apache.commons.lang.math.NumberUtils;
@ -76,6 +77,18 @@ public class RipUtils {
            }
            return result;
        }
+        else if (url.getHost().endsWith("redgifs.com") || url.getHost().endsWith("gifdeliverynetwork.com")) {
+            try {
+                logger.debug("Fetching redgifs page " + url);
+                String videoURL = RedgifsRipper.getVideoURL(url);
+                logger.debug("Got redgifs URL: " + videoURL);
+                result.add(new URL(videoURL));
+            } catch (IOException e) {
+                // Do nothing
+                logger.warn("Exception while retrieving redgifs page:", e);
+            }
+            return result;
+        }
        else if (url.toExternalForm().contains("vidble.com/album/") || url.toExternalForm().contains("vidble.com/show/")) {
            try {
                logger.info("Getting vidble album " + url);
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedgifsRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/RedgifsRipperTest.java
@ -0,0 +1,56 @@
+package com.rarchives.ripme.tst.ripper.rippers;
+
+import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
+import org.jsoup.nodes.Document;
+import org.junit.jupiter.api.*;
+
+import java.io.IOException;
+import java.net.URL;
+
+public class RedgifsRipperTest extends RippersTest {
+
+    /**
+     * Rips correctly formatted URL directly from Redgifs
+     * @throws IOException
+     */
+    @Test
+    public void testRedgifsGoodURL() throws IOException{
+        RedgifsRipper ripper = new RedgifsRipper(new URL("https://www.redgifs.com/watch/talkativewarpeddragon-petite"));
+        testRipper(ripper);
+    }
+
+    /**
+     * Rips gifdeliverynetwork URL's by redirecting them to proper redgifs url
+     * @throws IOException
+     */
+    @Test
+    public void testRedgifsBadRL() throws IOException{
+        RedgifsRipper ripper = new RedgifsRipper(new URL("https://www.gifdeliverynetwork.com/foolishelasticchimpanzee"));
+        testRipper(ripper);
+    }
+
+    /**
+     * Rips a Redifs profile
+     * @throws IOException
+     */
+    @Test
+    public void testRedgifsProfile() throws IOException {
+        RedgifsRipper ripper  = new RedgifsRipper(new URL("https://redgifs.com/users/margo_monty"));
+        testRipper(ripper);
+    }
+
+    /**
+     * Rips a Redifs category/search
+     * @throws IOException
+     */
+    @Test
+    public void testRedgifsSearch() throws IOException {
+        RedgifsRipper ripper  = new RedgifsRipper(new URL("https://redgifs.com/gifs/browse/little-caprice"));
+        Document doc = ripper.getFirstPage();
+
+        doc = ripper.getNextPage(doc);
+        assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=150".equalsIgnoreCase(doc.location()));
+        doc = ripper.getNextPage(doc);
+        assertTrue("https://napi.redgifs.com/v1/gfycats/search?search_text=little%20caprice&count=150&start=300".equalsIgnoreCase(doc.location()));
+    }
+}