mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-26 23:34:53 +02:00
Merge pull request #1028 from buzzlightmonth/master
Added pinups and homepage to Sinner Comics ripper
This commit is contained in:
@@ -15,29 +15,91 @@ import org.jsoup.nodes.Element;
|
||||
|
||||
public class SinnercomicsRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String HOST = "sinnercomics",
|
||||
DOMAIN = "sinnercomics.com";
|
||||
|
||||
private static final int SLEEP_TIME = 500;
|
||||
|
||||
enum RIP_TYPE {
|
||||
HOMEPAGE,
|
||||
PINUP,
|
||||
COMIC
|
||||
}
|
||||
|
||||
private RIP_TYPE ripType;
|
||||
private Integer pageNum;
|
||||
|
||||
public SinnercomicsRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "sinnercomics";
|
||||
return HOST;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "sinnercomics.com";
|
||||
return DOMAIN;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String normalizeUrl(String url) {
|
||||
// Remove the comments hashtag
|
||||
return url.replaceAll("/#(comments|disqus_thread)", "/");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://sinnercomics.com/comic/([a-zA-Z0-9-]*)/?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
String cleanUrl = normalizeUrl(url.toExternalForm());
|
||||
Pattern p;
|
||||
Matcher m;
|
||||
|
||||
p = Pattern.compile("^https?://sinnercomics\\.com/comic/([a-zA-Z0-9-]*)/?$");
|
||||
m = p.matcher(cleanUrl);
|
||||
if (m.matches()) {
|
||||
// Comic
|
||||
this.ripType = RIP_TYPE.COMIC;
|
||||
return m.group(1).replaceAll("-page-\\d+", "");
|
||||
}
|
||||
|
||||
p = Pattern.compile("^https?://sinnercomics\\.com(?:/page/([0-9]+))?/?$");
|
||||
m = p.matcher(cleanUrl);
|
||||
if (m.matches()) {
|
||||
// Homepage
|
||||
this.ripType = RIP_TYPE.HOMEPAGE;
|
||||
if (m.group(1) != null) {
|
||||
this.pageNum = Integer.valueOf(m.group(1));
|
||||
} else {
|
||||
this.pageNum = 1;
|
||||
}
|
||||
return "homepage";
|
||||
}
|
||||
|
||||
p = Pattern.compile("^https?://sinnercomics\\.com/([a-zA-Z0-9-]+)(?:/#comments)?/?$");
|
||||
m = p.matcher(cleanUrl);
|
||||
if (m.matches()) {
|
||||
// Pinup image
|
||||
this.ripType = RIP_TYPE.PINUP;
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected sinnercomics.com URL format: " +
|
||||
"sinnercomics.com/comic/albumName - got " + url + " instead");
|
||||
"/pinupName or /comic/albumName or /page/number - got " + cleanUrl + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
if (!url.getHost().endsWith(DOMAIN)) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
getGID(url);
|
||||
} catch (MalformedURLException e) {
|
||||
// Can't get GID, can't rip it.
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -48,27 +110,53 @@ public class SinnercomicsRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// Find next page
|
||||
String nextUrl = "";
|
||||
// We use comic-nav-next to the find the next page
|
||||
Element elem = doc.select("a.comic-nav-next").first();
|
||||
if (elem == null) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
String nextPage = elem.attr("href");
|
||||
// Wait half a sec to avoid IP bans
|
||||
sleep(500);
|
||||
return Http.url(nextPage).get();
|
||||
String nextUrl = null;
|
||||
|
||||
switch (this.ripType) {
|
||||
case PINUP:
|
||||
throw new IOException("No next page on a pinup");
|
||||
|
||||
case COMIC:
|
||||
// We use comic-nav-next to the find the next page
|
||||
Element elem = doc.select("a.comic-nav-next").first();
|
||||
if (elem == null) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
nextUrl = elem.attr("href");
|
||||
break;
|
||||
|
||||
default: // case HOMEPAGE:
|
||||
this.pageNum++;
|
||||
nextUrl = "https://sinnercomics.com/page/" + String.valueOf(this.pageNum);
|
||||
break;
|
||||
}
|
||||
|
||||
// Wait to avoid IP bans
|
||||
sleep(SLEEP_TIME);
|
||||
return Http.url(nextUrl).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
for (Element el : doc.select("meta[property=og:image]")) {
|
||||
String imageSource = el.attr("content");
|
||||
imageSource = imageSource.replace(" alt=", "");
|
||||
result.add(imageSource);
|
||||
|
||||
switch (this.ripType) {
|
||||
case COMIC:
|
||||
// comic pages only contain one image, determined by a meta tag
|
||||
for (Element el : doc.select("meta[property=og:image]")) {
|
||||
String imageSource = el.attr("content");
|
||||
imageSource = imageSource.replace(" alt=", "");
|
||||
result.add(imageSource);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (Element el : doc.select(".entry p img")) {
|
||||
// These filters match the full size images but might match ads too...
|
||||
result.add(el.attr("src"));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -77,5 +165,4 @@ public class SinnercomicsRipper extends AbstractHTMLRipper {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@@ -7,13 +7,39 @@ import com.rarchives.ripme.ripper.rippers.SinnercomicsRipper;
|
||||
|
||||
public class SinnercomicsRipperTest extends RippersTest {
|
||||
public void testSinnercomicsAlbum() throws IOException {
|
||||
SinnercomicsRipper ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/"));
|
||||
SinnercomicsRipper ripper;
|
||||
|
||||
// Comic test
|
||||
ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/"));
|
||||
testRipper(ripper);
|
||||
|
||||
// Homepage test
|
||||
ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/page/2/"));
|
||||
testRipper(ripper);
|
||||
|
||||
// Pinup test
|
||||
ripper = new SinnercomicsRipper(new URL("https://sinnercomics.com/elsa-frozen-2/#comments"));
|
||||
testRipper(ripper);
|
||||
|
||||
}
|
||||
|
||||
public void testGetGID() throws IOException {
|
||||
URL url = new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/");
|
||||
SinnercomicsRipper ripper = new SinnercomicsRipper(url);
|
||||
URL url;
|
||||
SinnercomicsRipper ripper;
|
||||
|
||||
// Comic test
|
||||
url = new URL("https://sinnercomics.com/comic/beyond-the-hotel-page-01/");
|
||||
ripper = new SinnercomicsRipper(url);
|
||||
assertEquals("beyond-the-hotel", ripper.getGID(url));
|
||||
|
||||
// Homepage test
|
||||
url = new URL("https://sinnercomics.com/page/2/");
|
||||
ripper = new SinnercomicsRipper(url);
|
||||
assertEquals("homepage", ripper.getGID(url));
|
||||
|
||||
// Comic test
|
||||
url = new URL("https://sinnercomics.com/elza-frozen-2/#comments");
|
||||
ripper = new SinnercomicsRipper(url);
|
||||
assertEquals("elza-frozen-2", ripper.getGID(url));
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user