1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-27 15:49:50 +02:00

Merge pull request #1028 from buzzlightmonth/master

Added pinups and homepage to Sinner Comics ripper
This commit is contained in:
cyian-1756
2018-10-28 17:34:01 -05:00
committed by GitHub
2 changed files with 138 additions and 25 deletions

View File

@@ -15,29 +15,91 @@ import org.jsoup.nodes.Element;
public class SinnercomicsRipper extends AbstractHTMLRipper { public class SinnercomicsRipper extends AbstractHTMLRipper {
private static final String HOST = "sinnercomics",
DOMAIN = "sinnercomics.com";
private static final int SLEEP_TIME = 500;
enum RIP_TYPE {
HOMEPAGE,
PINUP,
COMIC
}
private RIP_TYPE ripType;
private Integer pageNum;
public SinnercomicsRipper(URL url) throws IOException { public SinnercomicsRipper(URL url) throws IOException {
super(url); super(url);
} }
@Override @Override
public String getHost() { public String getHost() {
return "sinnercomics"; return HOST;
} }
@Override @Override
public String getDomain() { public String getDomain() {
return "sinnercomics.com"; return DOMAIN;
}
@Override
public String normalizeUrl(String url) {
// Remove the comments hashtag
return url.replaceAll("/#(comments|disqus_thread)", "/");
} }
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://sinnercomics.com/comic/([a-zA-Z0-9-]*)/?$"); String cleanUrl = normalizeUrl(url.toExternalForm());
Matcher m = p.matcher(url.toExternalForm()); Pattern p;
Matcher m;
p = Pattern.compile("^https?://sinnercomics\\.com/comic/([a-zA-Z0-9-]*)/?$");
m = p.matcher(cleanUrl);
if (m.matches()) { if (m.matches()) {
// Comic
this.ripType = RIP_TYPE.COMIC;
return m.group(1).replaceAll("-page-\\d+", ""); return m.group(1).replaceAll("-page-\\d+", "");
} }
p = Pattern.compile("^https?://sinnercomics\\.com(?:/page/([0-9]+))?/?$");
m = p.matcher(cleanUrl);
if (m.matches()) {
// Homepage
this.ripType = RIP_TYPE.HOMEPAGE;
if (m.group(1) != null) {
this.pageNum = Integer.valueOf(m.group(1));
} else {
this.pageNum = 1;
}
return "homepage";
}
p = Pattern.compile("^https?://sinnercomics\\.com/([a-zA-Z0-9-]+)(?:/#comments)?/?$");
m = p.matcher(cleanUrl);
if (m.matches()) {
// Pinup image
this.ripType = RIP_TYPE.PINUP;
return m.group(1);
}
throw new MalformedURLException("Expected sinnercomics.com URL format: " + throw new MalformedURLException("Expected sinnercomics.com URL format: " +
"sinnercomics.com/comic/albumName - got " + url + " instead"); "/pinupName or /comic/albumName or /page/number - got " + cleanUrl + " instead");
}
@Override
public boolean canRip(URL url) {
if (!url.getHost().endsWith(DOMAIN)) {
return false;
}
try {
getGID(url);
} catch (MalformedURLException e) {
// Can't get GID, can't rip it.
return false;
}
return true;
} }
@Override @Override
@@ -48,27 +110,53 @@ public class SinnercomicsRipper extends AbstractHTMLRipper {
@Override @Override
public Document getNextPage(Document doc) throws IOException { public Document getNextPage(Document doc) throws IOException {
// Find next page String nextUrl = null;
String nextUrl = "";
// We use comic-nav-next to the find the next page switch (this.ripType) {
Element elem = doc.select("a.comic-nav-next").first(); case PINUP:
if (elem == null) { throw new IOException("No next page on a pinup");
throw new IOException("No more pages");
} case COMIC:
String nextPage = elem.attr("href"); // We use comic-nav-next to the find the next page
// Wait half a sec to avoid IP bans Element elem = doc.select("a.comic-nav-next").first();
sleep(500); if (elem == null) {
return Http.url(nextPage).get(); throw new IOException("No more pages");
}
nextUrl = elem.attr("href");
break;
default: // case HOMEPAGE:
this.pageNum++;
nextUrl = "https://sinnercomics.com/page/" + String.valueOf(this.pageNum);
break;
} }
// Wait to avoid IP bans
sleep(SLEEP_TIME);
return Http.url(nextUrl).get();
}
@Override @Override
public List<String> getURLsFromPage(Document doc) { public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>(); List<String> result = new ArrayList<>();
for (Element el : doc.select("meta[property=og:image]")) {
String imageSource = el.attr("content"); switch (this.ripType) {
imageSource = imageSource.replace(" alt=", ""); case COMIC:
result.add(imageSource); // comic pages only contain one image, determined by a meta tag
for (Element el : doc.select("meta[property=og:image]")) {
String imageSource = el.attr("content");
imageSource = imageSource.replace(" alt=", "");
result.add(imageSource);
}
break;
default:
for (Element el : doc.select(".entry p img")) {
// These filters match the full size images but might match ads too...
result.add(el.attr("src"));
}
break;
} }
return result; return result;
} }
@@ -77,5 +165,4 @@ public class SinnercomicsRipper extends AbstractHTMLRipper {
addURLToDownload(url, getPrefix(index)); addURLToDownload(url, getPrefix(index));
} }
} }

View File

@@ -7,13 +7,39 @@ import com.rarchives.ripme.ripper.rippers.SinnercomicsRipper;
public class SinnercomicsRipperTest extends RippersTest { public class SinnercomicsRipperTest extends RippersTest {
public void testSinnercomicsAlbum() throws IOException { public void testSinnercomicsAlbum() throws IOException {
SinnercomicsRipper ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/")); SinnercomicsRipper ripper;
// Comic test
ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/"));
testRipper(ripper); testRipper(ripper);
// Homepage test
ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/page/2/"));
testRipper(ripper);
// Pinup test
ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/elsa-frozen-2/#comments"));
testRipper(ripper);
} }
public void testGetGID() throws IOException { public void testGetGID() throws IOException {
URL url = new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/"); URL url;
SinnercomicsRipper ripper = new SinnercomicsRipper(url); SinnercomicsRipper ripper;
// Comic test
url = new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/");
ripper = new SinnercomicsRipper(url);
assertEquals("beyond-the-hotel", ripper.getGID(url)); assertEquals("beyond-the-hotel", ripper.getGID(url));
// Homepage test
url = new URL("https://sinnercomics.com/page/2/");
ripper = new SinnercomicsRipper(url);
assertEquals("homepage", ripper.getGID(url));
// Comic test
url = new URL("https://sinnercomics.com/elza-frozen-2/#comments");
ripper = new SinnercomicsRipper(url);
assertEquals("elza-frozen-2", ripper.getGID(url));
} }
} }