From 7fe7f7a284e571c3fb82b62f248b59effe77cd85 Mon Sep 17 00:00:00 2001 From: darshan-poudel Date: Fri, 1 May 2020 11:08:49 -0500 Subject: [PATCH 1/3] #38 comment-589679044: Added a ripper for freecomiconline.me supports chapter download and continues to next chapter until caught up --- .../ripper/rippers/FreeComicOnlineRipper.java | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java new file mode 100644 index 00000000..c640e6ec --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java @@ -0,0 +1,89 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class FreeComicOnlineRipper extends AbstractHTMLRipper { + + public FreeComicOnlineRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "freecomiconline"; + } + + @Override + public String getDomain() { + return "freecomiconline.me"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1) + "_" + m.group(2); + } + p = Pattern.compile("^https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/?$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected freecomiconline URL format: " + + "freecomiconline.me/TITLE/CHAPTER - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + String nextPage = doc.select("div.select-pagination a").get(1).attr("href"); + String nextUrl = ""; + // "https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$" + System.out.println("\n\nPagination.(0).href: "+ nextPage); + + Pattern p = Pattern.compile("https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$"); + Matcher m = p.matcher(nextPage); + + if(m.matches()){ + nextUrl = m.group(0); + //System.out.println("\n\nMatched and recreatedUrl: "+ nextUrl+"\n\n"); + + } + if(nextUrl.equals("")) throw new IOException("No more pages"); + + sleep(500); + return Http.url(nextUrl).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + for (Element el : doc.select(".wp-manga-chapter-img")) { + result.add(el.attr("src")); + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} From 1ea679575c82323e3f014453f45a603cfeaf1e45 Mon Sep 17 00:00:00 2001 From: darshan-poudel Date: Fri, 1 May 2020 11:15:42 -0500 Subject: [PATCH 2/3] cleaned up comments --- .../ripme/ripper/rippers/FreeComicOnlineRipper.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java index c640e6ec..daba6a41 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java @@ -7,10 +7,8 @@ import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; - import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; - import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; @@ -56,19 +54,12 @@ public class FreeComicOnlineRipper extends AbstractHTMLRipper { public Document getNextPage(Document doc) throws IOException { String nextPage = doc.select("div.select-pagination a").get(1).attr("href"); String nextUrl = ""; - // "https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$" - System.out.println("\n\nPagination.(0).href: "+ nextPage); - Pattern p = Pattern.compile("https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$"); Matcher m = p.matcher(nextPage); - if(m.matches()){ nextUrl = m.group(0); - //System.out.println("\n\nMatched and recreatedUrl: "+ nextUrl+"\n\n"); - } if(nextUrl.equals("")) throw new IOException("No more pages"); - sleep(500); return Http.url(nextUrl).get(); } From 606f3665ef7bcdb7eeda8b9064164d0dc21a3e28 Mon Sep 17 00:00:00 2001 From: darshan-poudel Date: Fri, 1 May 2020 12:32:55 -0500 Subject: [PATCH 3/3] Fixing indent --- .../rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java index daba6a41..e08d77fd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FreeComicOnlineRipper.java @@ -57,7 +57,7 @@ public class FreeComicOnlineRipper extends AbstractHTMLRipper { Pattern p = Pattern.compile("https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$"); Matcher m = p.matcher(nextPage); if(m.matches()){ - nextUrl = m.group(0); + nextUrl = m.group(0); } if(nextUrl.equals("")) throw new IOException("No more pages"); sleep(500);