diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java index 166bce88..4411adfe 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java @@ -15,29 +15,91 @@ import org.jsoup.nodes.Element; public class SinnercomicsRipper extends AbstractHTMLRipper { + private static final String HOST = "sinnercomics", + DOMAIN = "sinnercomics.com"; + + private static final int SLEEP_TIME = 500; + + enum RIP_TYPE { + HOMEPAGE, + PINUP, + COMIC + } + + private RIP_TYPE ripType; + private Integer pageNum; + public SinnercomicsRipper(URL url) throws IOException { - super(url); + super(url); } @Override public String getHost() { - return "sinnercomics"; + return HOST; } @Override public String getDomain() { - return "sinnercomics.com"; + return DOMAIN; + } + + @Override + public String normalizeUrl(String url) { + // Remove the comments hashtag + return url.replaceAll("/#(comments|disqus_thread)", "/"); } @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://sinnercomics.com/comic/([a-zA-Z0-9-]*)/?$"); - Matcher m = p.matcher(url.toExternalForm()); + String cleanUrl = normalizeUrl(url.toExternalForm()); + Pattern p; + Matcher m; + + p = Pattern.compile("^https?://sinnercomics\\.com/comic/([a-zA-Z0-9-]*)/?$"); + m = p.matcher(cleanUrl); if (m.matches()) { + // Comic + this.ripType = RIP_TYPE.COMIC; return m.group(1).replaceAll("-page-\\d+", ""); } + + p = Pattern.compile("^https?://sinnercomics\\.com(?:/page/([0-9]+))?/?$"); + m = p.matcher(cleanUrl); + if (m.matches()) { + // Homepage + this.ripType = RIP_TYPE.HOMEPAGE; + if (m.group(1) != null) { + this.pageNum = Integer.valueOf(m.group(1)); + } else { + this.pageNum = 1; + } + return "homepage"; + } + + p = Pattern.compile("^https?://sinnercomics\\.com/([a-zA-Z0-9-]+)(?:/#comments)?/?$"); + m = p.matcher(cleanUrl); + if (m.matches()) { + // Pinup image + this.ripType = RIP_TYPE.PINUP; + return m.group(1); + } + throw new MalformedURLException("Expected sinnercomics.com URL format: " + - "sinnercomics.com/comic/albumName - got " + url + " instead"); + "/pinupName or /comic/albumName or /page/number - got " + cleanUrl + " instead"); + } + + @Override + public boolean canRip(URL url) { + if (!url.getHost().endsWith(DOMAIN)) { + return false; + } + try { + getGID(url); + } catch (MalformedURLException e) { + // Can't get GID, can't rip it. + return false; + } + return true; } @Override @@ -48,27 +110,53 @@ public class SinnercomicsRipper extends AbstractHTMLRipper { @Override public Document getNextPage(Document doc) throws IOException { - // Find next page - String nextUrl = ""; - // We use comic-nav-next to the find the next page - Element elem = doc.select("a.comic-nav-next").first(); - if (elem == null) { - throw new IOException("No more pages"); - } - String nextPage = elem.attr("href"); - // Wait half a sec to avoid IP bans - sleep(500); - return Http.url(nextPage).get(); + String nextUrl = null; + + switch (this.ripType) { + case PINUP: + throw new IOException("No next page on a pinup"); + + case COMIC: + // We use comic-nav-next to the find the next page + Element elem = doc.select("a.comic-nav-next").first(); + if (elem == null) { + throw new IOException("No more pages"); + } + nextUrl = elem.attr("href"); + break; + + default: // case HOMEPAGE: + this.pageNum++; + nextUrl = "https://sinnercomics.com/page/" + String.valueOf(this.pageNum); + break; } + // Wait to avoid IP bans + sleep(SLEEP_TIME); + return Http.url(nextUrl).get(); + } + @Override public List getURLsFromPage(Document doc) { List result = new ArrayList<>(); - for (Element el : doc.select("meta[property=og:image]")) { - String imageSource = el.attr("content"); - imageSource = imageSource.replace(" alt=", ""); - result.add(imageSource); + + switch (this.ripType) { + case COMIC: + // comic pages only contain one image, determined by a meta tag + for (Element el : doc.select("meta[property=og:image]")) { + String imageSource = el.attr("content"); + imageSource = imageSource.replace(" alt=", ""); + result.add(imageSource); + } + break; + default: + for (Element el : doc.select(".entry p img")) { + // These filters match the full size images but might match ads too... + result.add(el.attr("src")); + } + break; } + return result; } @@ -77,5 +165,4 @@ public class SinnercomicsRipper extends AbstractHTMLRipper { addURLToDownload(url, getPrefix(index)); } - } diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinnercomicsRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinnercomicsRipperTest.java index 3866b6ba..90bb3e85 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinnercomicsRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinnercomicsRipperTest.java @@ -7,13 +7,39 @@ import com.rarchives.ripme.ripper.rippers.SinnercomicsRipper; public class SinnercomicsRipperTest extends RippersTest { public void testSinnercomicsAlbum() throws IOException { - SinnercomicsRipper ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/")); + SinnercomicsRipper ripper; + + // Comic test + ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/")); testRipper(ripper); + + // Homepage test + ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/page/2/")); + testRipper(ripper); + + // Pinup test + ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/elsa-frozen-2/#comments")); + testRipper(ripper); + } public void testGetGID() throws IOException { - URL url = new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/"); - SinnercomicsRipper ripper = new SinnercomicsRipper(url); + URL url; + SinnercomicsRipper ripper; + + // Comic test + url = new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/"); + ripper = new SinnercomicsRipper(url); assertEquals("beyond-the-hotel", ripper.getGID(url)); + + // Homepage test + url = new URL("https://sinnercomics.com/page/2/"); + ripper = new SinnercomicsRipper(url); + assertEquals("homepage", ripper.getGID(url)); + + // Comic test + url = new URL("https://sinnercomics.com/elza-frozen-2/#comments"); + ripper = new SinnercomicsRipper(url); + assertEquals("elza-frozen-2", ripper.getGID(url)); } } \ No newline at end of file