From 822ff72a58ea674bbb09241f0da29185b6b61e3c Mon Sep 17 00:00:00 2001
From: Tushar <tush_r67@hotmail.com>
Date: Mon, 15 Apr 2019 18:25:06 +0530
Subject: [PATCH 1/2] Added support for comicextra.

---
 .../ripper/rippers/ComicextraRipper.java      | 171 ++++++++++++++++++
 .../ripper/rippers/ComicextraRipperTest.java  |  21 +++
 2 files changed, 192 insertions(+)
 create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java
 create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/ComicextraRipperTest.java
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java
new file mode 100644
index 00000000..c8cca3a9
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java
@@ -0,0 +1,171 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+
+/**
+ * @author Tushar
+ *
+ */
+public class ComicextraRipper extends AbstractHTMLRipper {
+
+    private static final String FILE_NAME = "page";
+
+    private Pattern p1 =
+            Pattern.compile("https:\\/\\/www.comicextra.com\\/comic\\/([A-Za-z0-9_-]+)");
+    private Pattern p2 = Pattern.compile(
+            "https:\\/\\/www.comicextra.com\\/([A-Za-z0-9_-]+)\\/([A-Za-z0-9_-]+)(?:\\/full)?");
+    private UrlType urlType = UrlType.UNKNOWN;
+    private List<String> chaptersList = null;
+    private int chapterIndex = -1; // index for the chaptersList, useful in getting the next page.
+    private int imageIndex = 0; // image index for each chapter images.
+
+    public ComicextraRipper(URL url) throws IOException {
+        super(url);
+    }
+
+    @Override
+    protected String getDomain() {
+        return "comicextra.com";
+    }
+
+    @Override
+    public String getHost() {
+        return "comicextra";
+    }
+
+    @Override
+    public String getGID(URL url) throws MalformedURLException {
+        Matcher m1 = p1.matcher(url.toExternalForm());
+        if (m1.matches()) {
+            // URL is of comic( https://www.comicextra.com/comic/the-punisher-frank-castle-max).
+            urlType = UrlType.COMIC;
+            return m1.group(1);
+        }
+
+        Matcher m2 = p2.matcher(url.toExternalForm());
+        if (m2.matches()) {
+            // URL is of chapter( https://www.comicextra.com/the-punisher-frank-castle-max/chapter-75).
+            urlType = UrlType.CHAPTER;
+            return m2.group(1);
+        }
+
+        throw new MalformedURLException(
+                "Expected comicextra.com url of type: https://www.comicextra.com/comic/some-comic-name\n"
+                        + " or https://www.comicextra.com/some-comic-name/chapter-001 got " + url
+                        + " instead");
+    }
+
+    @Override
+    protected Document getFirstPage() throws IOException {
+        Document doc = null;
+
+        switch (urlType) {
+            case COMIC:
+                // For COMIC type url we extract the urls of each chapters and store them in chapters.
+                chaptersList = new ArrayList<>();
+                Document comicPage = Http.url(url).get();
+                Elements elements = comicPage.select("div.episode-list a");
+                for (Element e : elements) {
+                    chaptersList.add(getCompleteChapterUrl(e.attr("abs:href")));
+                }
+
+                // Set the first chapter from the chapterList as the doc.                
+                chapterIndex = 0;
+                doc = Http.url(chaptersList.get(chapterIndex)).get();
+                break;
+            case CHAPTER:
+                doc = Http.url(url).get();
+                break;
+            case UNKNOWN:
+            default:
+                throw new IOException("Unknown url type encountered.");
+        }
+
+        return doc;
+    }
+
+    @Override
+    public Document getNextPage(Document doc) throws IOException {
+        if (urlType == UrlType.COMIC) {
+            ++chapterIndex;
+            imageIndex = 0; // Resetting the imagesIndex so that images prefix within each chapter starts from '001_'.
+            return Http.url(chaptersList.get(chapterIndex)).get();
+        }
+
+        return super.getNextPage(doc);
+    }
+
+    @Override
+    protected List<String> getURLsFromPage(Document page) {
+        List<String> urls = new ArrayList<>();
+
+        if (urlType == UrlType.COMIC || urlType == UrlType.CHAPTER) {
+            Elements images = page.select("img.chapter_img");
+            for (Element img : images) {
+                urls.add(img.attr("src"));
+            }
+        }
+
+        return urls;
+    }
+
+    @Override
+    protected void downloadURL(URL url, int index) {
+        String subdirectory = getSubDirectoryName();
+        String prefix = getPrefix(++imageIndex);
+
+        addURLToDownload(url, prefix, subdirectory, null, null, FILE_NAME, null, Boolean.TRUE);
+    }
+
+    /*
+     * This function appends /full at the end of the chapters url to get all the images for the
+     * chapter in the same Document.
+     */
+    private String getCompleteChapterUrl(String chapterUrl) {
+        if (!chapterUrl.endsWith("/full")) {
+            chapterUrl = chapterUrl + "/full";
+        }
+        return chapterUrl;
+    }
+
+    /*
+     * This functions returns sub folder name for the current chapter.
+     */
+    private String getSubDirectoryName() {
+        String subDirectory = "";
+
+        if (urlType == UrlType.COMIC) {
+            Matcher m = p2.matcher(chaptersList.get(chapterIndex));
+            if (m.matches()) {
+                subDirectory = m.group(2);
+            }
+        }
+
+        if (urlType == UrlType.CHAPTER) {
+            Matcher m = p2.matcher(url.toExternalForm());
+            if (m.matches()) {
+                subDirectory = m.group(2);
+            }
+        }
+
+        return subDirectory;
+    }
+
+    /*
+     * Enum to classify different types of urls.
+     */
+    private enum UrlType {
+        COMIC, CHAPTER, UNKNOWN
+    }
+}
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ComicextraRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ComicextraRipperTest.java
new file mode 100644
index 00000000..a3d66d50
--- /dev/null
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ComicextraRipperTest.java
@@ -0,0 +1,21 @@
+package com.rarchives.ripme.tst.ripper.rippers;
+
+import java.io.IOException;
+import java.net.URL;
+import com.rarchives.ripme.ripper.rippers.ComicextraRipper;
+
+public class ComicextraRipperTest extends RippersTest {
+
+    public void testComicUrl() throws IOException {
+        URL url = new URL("https://www.comicextra.com/comic/karma-police");
+        ComicextraRipper ripper = new ComicextraRipper(url);
+        testRipper(ripper);
+    }
+
+    public void testChapterUrl() throws IOException {
+        URL url = new URL("https://www.comicextra.com/v-for-vendetta/chapter-1");
+        ComicextraRipper ripper = new ComicextraRipper(url);
+        testRipper(ripper);
+    }
+
+}

From b36fd1aa6aa4a28e9367137d3be0d0818fa84b85 Mon Sep 17 00:00:00 2001
From: Tushar <tush_r67@hotmail.com>
Date: Mon, 15 Apr 2019 18:53:42 +0530
Subject: [PATCH 2/2] Fixed an IndexOutOfBoundException while fetching the next
 page.

---
 .../com/rarchives/ripme/ripper/rippers/ComicextraRipper.java  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java
index c8cca3a9..08b27a76 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ComicextraRipper.java
@@ -100,7 +100,9 @@ public class ComicextraRipper extends AbstractHTMLRipper {
         if (urlType == UrlType.COMIC) {
             ++chapterIndex;
             imageIndex = 0; // Resetting the imagesIndex so that images prefix within each chapter starts from '001_'.
-            return Http.url(chaptersList.get(chapterIndex)).get();
+            if (chapterIndex < chaptersList.size()) {
+                return Http.url(chaptersList.get(chapterIndex)).get();
+            }
         }
 
         return super.getNextPage(doc);