1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-04-21 12:12:38 +02:00

Remove ComicextraRipper - site is permanently offline

This commit is contained in:
MetaPrime 2025-01-06 12:39:50 -08:00
parent 0fccb69473
commit 6ede556c6f
2 changed files with 0 additions and 202 deletions

View File

@ -1,174 +0,0 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
/**
* @author Tushar
*
*/
public class ComicextraRipper extends AbstractHTMLRipper {
private static final String FILE_NAME = "page";
private Pattern p1 =
Pattern.compile("https:\\/\\/www.comicextra.com\\/comic\\/([A-Za-z0-9_-]+)");
private Pattern p2 = Pattern.compile(
"https:\\/\\/www.comicextra.com\\/([A-Za-z0-9_-]+)\\/([A-Za-z0-9_-]+)(?:\\/full)?");
private UrlType urlType = UrlType.UNKNOWN;
private List<String> chaptersList = null;
private int chapterIndex = -1; // index for the chaptersList, useful in getting the next page.
private int imageIndex = 0; // image index for each chapter images.
public ComicextraRipper(URL url) throws IOException {
super(url);
}
@Override
protected String getDomain() {
return "comicextra.com";
}
@Override
public String getHost() {
return "comicextra";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m1 = p1.matcher(url.toExternalForm());
if (m1.matches()) {
// URL is of comic( https://www.comicextra.com/comic/the-punisher-frank-castle-max).
urlType = UrlType.COMIC;
return m1.group(1);
}
Matcher m2 = p2.matcher(url.toExternalForm());
if (m2.matches()) {
// URL is of chapter( https://www.comicextra.com/the-punisher-frank-castle-max/chapter-75).
urlType = UrlType.CHAPTER;
return m2.group(1);
}
throw new MalformedURLException(
"Expected comicextra.com url of type: https://www.comicextra.com/comic/some-comic-name\n"
+ " or https://www.comicextra.com/some-comic-name/chapter-001 got " + url
+ " instead");
}
@Override
protected Document getFirstPage() throws IOException {
Document doc = null;
switch (urlType) {
case COMIC:
// For COMIC type url we extract the urls of each chapters and store them in chapters.
chaptersList = new ArrayList<>();
Document comicPage = Http.url(url).get();
Elements elements = comicPage.select("div.episode-list a");
for (Element e : elements) {
chaptersList.add(getCompleteChapterUrl(e.attr("abs:href")));
}
// Set the first chapter from the chapterList as the doc.
chapterIndex = 0;
doc = Http.url(chaptersList.get(chapterIndex)).get();
break;
case CHAPTER:
doc = Http.url(url).get();
break;
case UNKNOWN:
default:
throw new IOException("Unknown url type encountered.");
}
return doc;
}
@Override
public Document getNextPage(Document doc) throws IOException, URISyntaxException {
if (urlType == UrlType.COMIC) {
++chapterIndex;
imageIndex = 0; // Resetting the imagesIndex so that images prefix within each chapter starts from '001_'.
if (chapterIndex < chaptersList.size()) {
return Http.url(chaptersList.get(chapterIndex)).get();
}
}
return super.getNextPage(doc);
}
@Override
protected List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<>();
if (urlType == UrlType.COMIC || urlType == UrlType.CHAPTER) {
Elements images = page.select("img.chapter_img");
for (Element img : images) {
urls.add(img.attr("src"));
}
}
return urls;
}
@Override
protected void downloadURL(URL url, int index) {
String subdirectory = getSubDirectoryName();
String prefix = getPrefix(++imageIndex);
addURLToDownload(url, subdirectory, null, null, prefix, FILE_NAME, null, Boolean.TRUE);
}
/*
* This function appends /full at the end of the chapters url to get all the images for the
* chapter in the same Document.
*/
private String getCompleteChapterUrl(String chapterUrl) {
if (!chapterUrl.endsWith("/full")) {
chapterUrl = chapterUrl + "/full";
}
return chapterUrl;
}
/*
* This functions returns sub folder name for the current chapter.
*/
private String getSubDirectoryName() {
String subDirectory = "";
if (urlType == UrlType.COMIC) {
Matcher m = p2.matcher(chaptersList.get(chapterIndex));
if (m.matches()) {
subDirectory = m.group(2);
}
}
if (urlType == UrlType.CHAPTER) {
Matcher m = p2.matcher(url.toExternalForm());
if (m.matches()) {
subDirectory = m.group(2);
}
}
return subDirectory;
}
/*
* Enum to classify different types of urls.
*/
private enum UrlType {
COMIC, CHAPTER, UNKNOWN
}
}

View File

@ -1,28 +0,0 @@
package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import com.rarchives.ripme.ripper.rippers.ComicextraRipper;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
public class ComicextraRipperTest extends RippersTest {
@Test
@Tag("flaky")
public void testComicUrl() throws IOException, URISyntaxException {
URL url = new URI("https://www.comicextra.com/comic/karma-police").toURL();
ComicextraRipper ripper = new ComicextraRipper(url);
testRipper(ripper);
}
@Test
@Disabled("no images found error, broken ripper?")
public void testChapterUrl() throws IOException, URISyntaxException {
URL url = new URI("https://www.comicextra.com/v-for-vendetta/chapter-1").toURL();
ComicextraRipper ripper = new ComicextraRipper(url);
testRipper(ripper);
}
}