From e9e770c97435e067c333ec1cf4a530dd9a21f3b9 Mon Sep 17 00:00:00 2001 From: Tushar Date: Sun, 27 Jan 2019 22:38:08 +0530 Subject: [PATCH 1/5] Fixed GfycatRipper not not ripping gifs. --- .../ripme/ripper/rippers/GfycatRipper.java | 39 +++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java index 9c2db859..2061db45 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java @@ -1,6 +1,5 @@ package com.rarchives.ripme.ripper.rippers; - import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; @@ -11,10 +10,11 @@ import java.util.regex.Pattern; import com.rarchives.ripme.ripper.AbstractSingleFileRipper; import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.rarchives.ripme.utils.Http; - +import com.rarchives.ripme.utils.Utils; public class GfycatRipper extends AbstractSingleFileRipper { @@ -42,7 +42,7 @@ public class GfycatRipper extends AbstractSingleFileRipper { @Override public URL sanitizeURL(URL url) throws MalformedURLException { url = new URL(url.toExternalForm().replace("/gifs/detail", "")); - + return url; } @@ -64,17 +64,23 @@ public class GfycatRipper extends AbstractSingleFileRipper { return m.group(1); } - throw new MalformedURLException( - "Expected gfycat.com format:" - + "gfycat.com/id" - + " Got: " + url); + throw new MalformedURLException("Expected gfycat.com format:" + "gfycat.com/id" + " Got: " + url); } @Override public List getURLsFromPage(Document doc) { List result = new ArrayList<>(); - Elements videos = doc.select("source"); + Elements videos = doc.select("video source"); String vidUrl = videos.first().attr("src"); + // Check preference for mp4 over webm/gif. + if (Utils.getConfigBoolean("prefer.mp4", false)) { + for (Element e : videos) { + if (e.hasAttr("src") && e.attr("src").endsWith(".mp4")) { + vidUrl = e.attr("src"); + break; + } + } + } if (vidUrl.startsWith("//")) { vidUrl = "http:" + vidUrl; } @@ -84,22 +90,33 @@ public class GfycatRipper extends AbstractSingleFileRipper { /** * Helper method for retrieving video URLs. - * @param url URL to gfycat page + * + * @param url + * URL to gfycat page * @return URL to video * @throws IOException */ public static String getVideoURL(URL url) throws IOException { LOGGER.info("Retrieving " + url.toExternalForm()); - //Sanitize the URL first + // Sanitize the URL first url = new URL(url.toExternalForm().replace("/gifs/detail", "")); Document doc = Http.url(url).get(); - Elements videos = doc.select("source"); + Elements videos = doc.select("video source"); if (videos.isEmpty()) { throw new IOException("Could not find source at " + url); } String vidUrl = videos.first().attr("src"); + // Check preference for mp4 over webm/gif. + if (Utils.getConfigBoolean("prefer.mp4", false)) { + for (Element e : videos) { + if (e.hasAttr("src") && e.attr("src").endsWith(".mp4")) { + vidUrl = e.attr("src"); + break; + } + } + } if (vidUrl.startsWith("//")) { vidUrl = "http:" + vidUrl; } From bea0dffd6836779513103270609d33ba82084fdc Mon Sep 17 00:00:00 2001 From: Tushar Date: Wed, 6 Feb 2019 13:41:58 +0530 Subject: [PATCH 2/5] Added ripper for xlecx.com --- .../ripme/ripper/rippers/XlecxRipper.java | 36 +++++++++++++++++++ .../tst/ripper/rippers/XlecxRipperTest.java | 13 +++++++ 2 files changed, 49 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/XlecxRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/XlecxRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XlecxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XlecxRipper.java new file mode 100644 index 00000000..15aee9c9 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XlecxRipper.java @@ -0,0 +1,36 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class XlecxRipper extends XcartxRipper { + + private Pattern p = Pattern.compile("^https?://xlecx.com/([a-zA-Z0-9_\\-]+).html"); + + public XlecxRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "xlecx"; + } + + @Override + public String getDomain() { + return "xlecx.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected URL format: http://xlecx.com/comic, got: " + url); + + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XlecxRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XlecxRipperTest.java new file mode 100644 index 00000000..4ae2e1ed --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XlecxRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.XlecxRipper; + +public class XlecxRipperTest extends RippersTest { + public void testAlbum() throws IOException { + XlecxRipper ripper = new XlecxRipper(new URL("http://xlecx.com/4937-tokimeki-nioi.html")); + testRipper(ripper); + } +} From 994dafb2175b113c145ce8317654ff8f935f11fe Mon Sep 17 00:00:00 2001 From: Tushar Date: Wed, 6 Feb 2019 13:48:27 +0530 Subject: [PATCH 3/5] Undo gyfcat changes. --- .../java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java index 2061db45..c8c7bf04 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java @@ -1,5 +1,6 @@ package com.rarchives.ripme.ripper.rippers; + import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; From 661514ea9834cfc3d3f81ac648831a58f796d6b8 Mon Sep 17 00:00:00 2001 From: Tushar Date: Wed, 6 Feb 2019 13:51:02 +0530 Subject: [PATCH 4/5] Undo gfycat changes, again. --- .../ripme/ripper/rippers/GfycatRipper.java | 60 ++++++++----------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java index c8c7bf04..49544df8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GfycatRipper.java @@ -10,12 +10,15 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import com.rarchives.ripme.ripper.AbstractSingleFileRipper; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; + public class GfycatRipper extends AbstractSingleFileRipper { @@ -43,7 +46,7 @@ public class GfycatRipper extends AbstractSingleFileRipper { @Override public URL sanitizeURL(URL url) throws MalformedURLException { url = new URL(url.toExternalForm().replace("/gifs/detail", "")); - + return url; } @@ -65,62 +68,47 @@ public class GfycatRipper extends AbstractSingleFileRipper { return m.group(1); } - throw new MalformedURLException("Expected gfycat.com format:" + "gfycat.com/id" + " Got: " + url); + throw new MalformedURLException( + "Expected gfycat.com format:" + + "gfycat.com/id" + + " Got: " + url); } @Override public List getURLsFromPage(Document doc) { List result = new ArrayList<>(); - Elements videos = doc.select("video source"); - String vidUrl = videos.first().attr("src"); - // Check preference for mp4 over webm/gif. - if (Utils.getConfigBoolean("prefer.mp4", false)) { - for (Element e : videos) { - if (e.hasAttr("src") && e.attr("src").endsWith(".mp4")) { - vidUrl = e.attr("src"); - break; - } + Elements videos = doc.select("script"); + for (Element el : videos) { + String json = el.html(); + if (json.startsWith("{")) { + JSONObject page = new JSONObject(json); + result.add(page.getJSONObject("video").getString("contentUrl")); } } - if (vidUrl.startsWith("//")) { - vidUrl = "http:" + vidUrl; - } - result.add(vidUrl); return result; } /** * Helper method for retrieving video URLs. - * - * @param url - * URL to gfycat page + * @param url URL to gfycat page * @return URL to video * @throws IOException */ public static String getVideoURL(URL url) throws IOException { LOGGER.info("Retrieving " + url.toExternalForm()); - // Sanitize the URL first + //Sanitize the URL first url = new URL(url.toExternalForm().replace("/gifs/detail", "")); Document doc = Http.url(url).get(); - Elements videos = doc.select("video source"); - if (videos.isEmpty()) { - throw new IOException("Could not find source at " + url); - } - String vidUrl = videos.first().attr("src"); - // Check preference for mp4 over webm/gif. - if (Utils.getConfigBoolean("prefer.mp4", false)) { - for (Element e : videos) { - if (e.hasAttr("src") && e.attr("src").endsWith(".mp4")) { - vidUrl = e.attr("src"); - break; - } + Elements videos = doc.select("script"); + for (Element el : videos) { + String json = el.html(); + if (json.startsWith("{")) { + JSONObject page = new JSONObject(json); + return page.getJSONObject("video").getString("contentUrl"); } } - if (vidUrl.startsWith("//")) { - vidUrl = "http:" + vidUrl; - } - return vidUrl; + throw new IOException(); } } \ No newline at end of file From d9620c38262e59d22fd017145ff88d78bb9f7d6b Mon Sep 17 00:00:00 2001 From: Tushar Date: Wed, 6 Feb 2019 14:22:14 +0530 Subject: [PATCH 5/5] Fixed xcartx ripper to grab all images. --- .../com/rarchives/ripme/ripper/rippers/XcartxRipper.java | 6 +++--- .../rarchives/ripme/tst/ripper/rippers/XlecxRipperTest.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XcartxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XcartxRipper.java index 3e34b239..94149612 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/XcartxRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XcartxRipper.java @@ -52,9 +52,9 @@ public class XcartxRipper extends AbstractHTMLRipper { @Override public List getURLsFromPage(Document page) { List imageURLs = new ArrayList<>(); - Elements albumElements = page.select("a.highslide"); - for (Element imageBox : albumElements) { - String imageUrl = imageBox.attr("href"); + Elements imageElements = page.select("div.f-desc img"); + for (Element image : imageElements) { + String imageUrl = image.attr("abs:src"); imageURLs.add(imageUrl); } diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XlecxRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XlecxRipperTest.java index 4ae2e1ed..b0c68b93 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XlecxRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XlecxRipperTest.java @@ -7,7 +7,7 @@ import com.rarchives.ripme.ripper.rippers.XlecxRipper; public class XlecxRipperTest extends RippersTest { public void testAlbum() throws IOException { - XlecxRipper ripper = new XlecxRipper(new URL("http://xlecx.com/4937-tokimeki-nioi.html")); + XlecxRipper ripper = new XlecxRipper(new URL("http://xlecx.com/4274-black-canary-ravished-prey.html")); testRipper(ripper); } }