From e8b1c0ec84b69b16b93a5baa15c964df4fd46084 Mon Sep 17 00:00:00 2001 From: Tushar Date: Fri, 25 Jan 2019 12:24:51 +0530 Subject: [PATCH] Fixed E621 ripper not ripping. --- .../ripme/ripper/rippers/E621Ripper.java | 230 ++++++++++-------- .../tst/ripper/rippers/E621RipperTest.java | 22 ++ 2 files changed, 145 insertions(+), 107 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java index 864a730a..534a1d0d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java @@ -6,8 +6,6 @@ import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; import java.io.IOException; import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; import java.net.URL; import java.util.ArrayList; import java.util.List; @@ -18,136 +16,154 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -public class E621Ripper extends AbstractHTMLRipper{ - private static final Logger logger = Logger.getLogger(E621Ripper.class); +public class E621Ripper extends AbstractHTMLRipper { + private static final Logger logger = Logger.getLogger(E621Ripper.class); - private static Pattern gidPattern=null; - private static Pattern gidPattern2=null; - private static Pattern gidPatternPool=null; + private static Pattern gidPattern = null; + private static Pattern gidPattern2 = null; + private static Pattern gidPatternPool = null; - private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621"); + private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621"); - public E621Ripper(URL url) throws IOException { - super(url); - } - - @Override - public DownloadThreadPool getThreadPool() { - return e621ThreadPool; - } - - @Override - public String getDomain() { - return "e621.net"; - } - - @Override - public String getHost() { - return "e621"; - } - - @Override - public Document getFirstPage() throws IOException { - if(url.getPath().startsWith("/pool/show/")) - return Http.url("https://e621.net/pool/show/"+getTerm(url)).get(); - else - return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get(); - } - - private String getFullSizedImage(String url) { - try { - Document page = Http.url("https://e621.net" + url).get(); - Elements video = page.select("video > source"); - Elements flash = page.select("embed"); - Elements image = page.select("a#highres"); - if (video.size() > 0) { - return video.attr("src"); - } else if (flash.size() > 0) { - return flash.attr("src"); - } else if (image.size() > 0) { - return image.attr("href"); - } else { - throw new IOException(); - } - } catch (IOException e) { - logger.error("Unable to get full sized image from " + url); - return null; - } + public E621Ripper(URL url) throws IOException { + super(url); } - @Override - public List getURLsFromPage(Document page) { - Elements elements = page.select("div > span.thumb > a"); - List res = new ArrayList<>(); + @Override + public DownloadThreadPool getThreadPool() { + return e621ThreadPool; + } - for(Element e:elements) { - if (!e.attr("href").isEmpty()) { - String fullSizedImage = getFullSizedImage(e.attr("href")); - if (fullSizedImage != null && !fullSizedImage.equals("")) { - res.add(getFullSizedImage(e.attr("href"))); - } + @Override + public String getDomain() { + return "e621.net"; + } + + @Override + public String getHost() { + return "e621"; + } + + @Override + public Document getFirstPage() throws IOException { + if (url.getPath().startsWith("/pool/show/")) + return Http.url("https://e621.net/pool/show/" + getTerm(url)).get(); + else + return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get(); + } + + @Override + public List getURLsFromPage(Document page) { + Elements elements = page.select("div > span.thumb > a"); + List res = new ArrayList<>(); + + for (Element e : elements) { + if (!e.attr("href").isEmpty()) { + res.add(e.attr("abs:href")); } - } + } - return res; - } + return res; + } - @Override - public Document getNextPage(Document page) throws IOException { - if (page.select("a.next_page") != null) { - return Http.url("https://e621.net" + page.select("a.next_page").attr("href")).get(); + @Override + public Document getNextPage(Document page) throws IOException { + if (!page.select("a.next_page").isEmpty()) { + return Http.url(page.select("a.next_page").attr("abs:href")).get(); } else { - throw new IOException("No more pages"); + throw new IOException("No more pages."); } } - @Override - public void downloadURL(final URL url, int index) { - addURLToDownload(url, getPrefix(index)); - } + @Override + public void downloadURL(final URL url, int index) { + // addURLToDownload(url, getPrefix(index)); + e621ThreadPool.addThread(new E621FileThread(url, getPrefix(index))); + } - private String getTerm(URL url) throws MalformedURLException{ - if(gidPattern==null) - gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$"); - if(gidPatternPool==null) - gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$"); + private String getTerm(URL url) throws MalformedURLException { + if (gidPattern == null) + gidPattern = Pattern.compile( + "^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$"); + if (gidPatternPool == null) + gidPatternPool = Pattern.compile( + "^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$"); - Matcher m = gidPattern.matcher(url.toExternalForm()); - if(m.matches()) { + Matcher m = gidPattern.matcher(url.toExternalForm()); + if (m.matches()) { LOGGER.info(m.group(2)); return m.group(2); } - m = gidPatternPool.matcher(url.toExternalForm()); - if(m.matches()) { + m = gidPatternPool.matcher(url.toExternalForm()); + if (m.matches()) { return m.group(2); } - throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead"); - } + throw new MalformedURLException( + "Expected e621.net URL format: e621.net/post/index/1/searchterm - got " + url + " instead"); + } - @Override - public String getGID(URL url) throws MalformedURLException { + @Override + public String getGID(URL url) throws MalformedURLException { + String prefix = ""; + if (url.getPath().startsWith("/pool/show/")) { + prefix = "pool_"; + } + return Utils.filesystemSafe(prefix + getTerm(url)); + } - String prefix=""; - if (url.getPath().startsWith("/pool/show/")) { - prefix = "pool_"; + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + if (gidPattern2 == null) + gidPattern2 = Pattern.compile( + "^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$"); + + Matcher m = gidPattern2.matcher(url.toExternalForm()); + if (m.matches()) + return new URL("https://e621.net/post/index/1/" + m.group(2).replace("+", "%20")); + + return url; + } + + public class E621FileThread extends Thread { + + private URL url; + private String index; + + public E621FileThread(URL url, String index) { + this.url = url; + this.index = index; + } + + @Override + public void run() { + try { + String fullSizedImage = getFullSizedImage(url); + if (fullSizedImage != null && !fullSizedImage.equals("")) { + addURLToDownload(new URL(fullSizedImage), index); + } + } catch (IOException e) { + logger.error("Unable to get full sized image from " + url); + } + } + + private String getFullSizedImage(URL imageURL) throws IOException { + Document page = Http.url(imageURL).retries(3).get(); + Elements video = page.select("video > source"); + Elements flash = page.select("embed"); + Elements image = page.select("a#highres"); + if (video.size() > 0) { + return video.attr("src"); + } else if (flash.size() > 0) { + return flash.attr("src"); + } else if (image.size() > 0) { + return image.attr("href"); + } else { + throw new IOException(); } - return Utils.filesystemSafe(prefix+getTerm(url)); - - } - - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - if(gidPattern2==null) - gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$"); - - Matcher m = gidPattern2.matcher(url.toExternalForm()); - if(m.matches()) - return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20")); - - return url; - } + } + } } diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/E621RipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/E621RipperTest.java index 0b57b603..01cb1532 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/E621RipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/E621RipperTest.java @@ -10,4 +10,26 @@ public class E621RipperTest extends RippersTest { E621Ripper ripper = new E621Ripper(new URL("https://e621.net/post/index/1/beach")); testRipper(ripper); } + + public void testFlashOrWebm() throws IOException { + E621Ripper ripper = new E621Ripper(new URL("https://e621.net/post/index/1/gif")); + testRipper(ripper); + } + + public void testGetNextPage() throws IOException { + E621Ripper nextPageRipper = new E621Ripper(new URL("https://e621.net/post/index/1/cosmicminerals")); + try { + nextPageRipper.getNextPage(nextPageRipper.getFirstPage()); + assert (true); + } catch (IOException e) { + throw e; + } + + E621Ripper noNextPageRipper = new E621Ripper(new URL("https://e621.net/post/index/1/cosmicminerals")); + try { + noNextPageRipper.getNextPage(noNextPageRipper.getFirstPage()); + } catch (IOException e) { + assertEquals(e.getMessage(), "No more pages."); + } + } }