diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java index 642c6417..729c1e05 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java @@ -1,7 +1,5 @@ package com.rarchives.ripme.ripper.rippers; -import com.rarchives.ripme.ripper.AbstractHTMLRipper; -import com.rarchives.ripme.utils.Http; import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; @@ -11,12 +9,14 @@ import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; + import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; public class MrCongRipper extends AbstractHTMLRipper { - private Document currDoc; private int lastPageNum; private int currPageNum; @@ -29,43 +29,48 @@ public class MrCongRipper extends AbstractHTMLRipper { @Override public String getHost() { - return "mrcong"; + return "misskon"; } @Override public String getDomain() { - return "mrcong.com"; + // NOTE: This was previously mrcong.com, which now redirects to + // misskon.com. Some resources still refer to mrcong.com + // but all the top level URLs are now misskon.com + return "misskon.com"; } @Override public String getGID(URL url) throws MalformedURLException { System.out.println(url.toExternalForm()); - Pattern p = Pattern.compile("^https?://mrcong\\.com/(\\S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$"); - Pattern p2 = Pattern.compile("^https?://mrcong\\.com/tag/(\\S*)/$"); //Added 6-10-21 + Pattern p = Pattern.compile( + "^https?://(?:[a-z]+\\.)?misskon\\.com/([-0-9a-zA-Z]+)(?:/?|/[0-9]+/?)?$"); + Pattern p2 = Pattern.compile("^https?://misskon\\.com/tag/(\\S*)/$"); Matcher m = p.matcher(url.toExternalForm()); - Matcher m2 = p2.matcher(url.toExternalForm()); //6-10-21 + Matcher m2 = p2.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); - } - else if(m2.matches()) { //Added 6-10-21 + } else if (m2.matches()) { tagPage = true; System.out.println("tagPage = TRUE"); return m2.group(1); } - throw new MalformedURLException("Expected mrcong.com URL format: " - + "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead"); + throw new MalformedURLException("Expected misskon.com URL format: " + + "misskon.com/GALLERY_NAME (or /PAGE_NUMBER/) - got " + url + + " instead"); } @Override - public Document getFirstPage() throws IOException { //returns the root gallery page regardless of actual page number + public Document getFirstPage() throws IOException { + // returns the root gallery page regardless of actual page number // "url" is an instance field of the superclass String rootUrlStr; URL rootUrl; - if(!tagPage) { + if (!tagPage) { rootUrlStr = url.toExternalForm().replaceAll("(|/|/[0-9]+/?)$", "/"); - } else { //6-10-21 + } else { // 6-10-21 rootUrlStr = url.toExternalForm().replaceAll("(page/[0-9]+/)$", "page/1/"); } @@ -81,7 +86,7 @@ public class MrCongRipper extends AbstractHTMLRipper { public Document getNextPage(Document doc) throws IOException { int pageNum = currPageNum; String urlStr; - if(!tagPage) { + if (!tagPage) { if (pageNum == 1 && lastPageNum > 1) { urlStr = url.toExternalForm().concat((pageNum + 1) + ""); System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr); @@ -89,43 +94,48 @@ public class MrCongRipper extends AbstractHTMLRipper { urlStr = url.toExternalForm().replaceAll("(/([0-9]*)/?)$", ("/" + (pageNum + 1) + "/")); System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr); } else { - //System.out.printf("Error: Page number provided goes past last valid page number\n"); + // System.out.printf("Error: Page number provided goes past last valid page + // number\n"); throw (new IOException("Error: Page number provided goes past last valid page number\n")); } - } else { //6-10-21 - //if (pageNum == 1 && lastPageNum >= 1) { - if (pageNum == 1 && lastPageNum > 1) { //6-10-21 + } else { // 6-10-21 + // if (pageNum == 1 && lastPageNum >= 1) { + if (pageNum == 1 && lastPageNum > 1) { // 6-10-21 urlStr = url.toExternalForm().concat("page/" + (pageNum + 1) + ""); System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr); } else if (pageNum < lastPageNum) { urlStr = url.toExternalForm().replaceAll("(page/([0-9]*)/?)$", ("page/" + (pageNum + 1) + "/")); System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr); } else { - //System.out.printf("Error: Page number provided goes past last valid page number\n"); + // System.out.printf("Error: Page number provided goes past last valid page + // number\n"); System.out.print("Error: There is no next page!\n"); return null; - //throw (new IOException("Error: Page number provided goes past last valid page number\n")); + // throw (new IOException("Error: Page number provided goes past last valid page + // number\n")); } } url = URI.create(urlStr).toURL(); currDoc = Http.url(url).get(); - currPageNum ++;//hi + currPageNum++;// hi return currDoc; } private int getMaxPageNumber(Document doc) { - if(!tagPage) { + if (!tagPage) { try { - lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); //gets the last possible page for the gallery - } catch(Exception e) { + // gets the last possible page for the gallery + lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); + } catch (Exception e) { return 1; } } else { try { - lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); //gets the last possible page for the gallery + // gets the last possible page for the gallery + lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); System.out.println("The last page found for " + url + " was " + lastPageNum); - } catch(Exception e) { + } catch (Exception e) { return 1; } } @@ -134,9 +144,9 @@ public class MrCongRipper extends AbstractHTMLRipper { } private int getCurrentPageNum(Document doc) { - int currPage; //6-10-21 + int currPage; // 6-10-21 - if(!tagPage) { + if (!tagPage) { currPage = Integer.parseInt(doc.select("div.page-link > span").first().text()); } else { currPage = Integer.parseInt(doc.select("div.pagination > span").first().text()); @@ -148,45 +158,27 @@ public class MrCongRipper extends AbstractHTMLRipper { } @Override - public List getURLsFromPage(Document doc) { //gets the urls of the images + public List getURLsFromPage(Document doc) { // gets the urls of the images List result = new ArrayList<>(); - if(!tagPage) { + if (!tagPage) { for (Element el : doc.select("p > img")) { - String imageSource = el.attr("src"); - result.add(imageSource); + String imageSource = el.attr("data-src"); + if (imageSource == null || imageSource.isEmpty()) { + imageSource = el.attr("src"); + } } System.out.println("\n1.)Printing List: " + result + "\n"); - } else { //6-10-21 - //List gallery_set_list = new ArrayList<>(); - + } else { for (Element el : doc.select("h2 > a")) { String pageSource = el.attr("href"); - if(!pageSource.equals("https://mrcong.com/")) { + if (!pageSource.equals("https://misskon.com/")) { result.add(pageSource); System.out.println("\n" + pageSource + " has been added to the list."); } } - /*for (String el2 : gallery_set_list) { - try { - URL temp_urL = URI.create(el2).toURL(); - MrCongRipper mcr = new MrCongRipper(temp_urL); - System.out.println("URL being ripped: " + mcr.url.toString()); - result.addAll(mcr.getURLsFromPage(mcr.getFirstPage())); - - Document nextPg = mcr.getNextPage(mcr.currDoc); - while(nextPg != null) { - result.addAll(mcr.getURLsFromPage(nextPg)); - nextPg = mcr.getNextPage(mcr.currDoc); - } - } catch (IOException e) { - e.printStackTrace(); - } - - }*/ - System.out.println("\n2.)Printing List: " + result + "\n"); } @@ -195,21 +187,20 @@ public class MrCongRipper extends AbstractHTMLRipper { @Override public void downloadURL(URL url, int index) { - //addURLToDownload(url, getPrefix(index)); - - if(!tagPage) { + if (!tagPage) { addURLToDownload(url, getPrefix(index)); } else { try { List ls = this.getURLsFromPage(this.currDoc); Document np = this.getNextPage(this.currDoc); - while(np != null) { //Creates a list of all sets to download + // Creates a list of all sets to download + while (np != null) { ls.addAll(this.getURLsFromPage(np)); np = this.getNextPage(np); } - for(String urlStr : ls) { + for (String urlStr : ls) { MrCongRipper mcr = new MrCongRipper(URI.create(urlStr).toURL()); mcr.setup(); mcr.rip(); @@ -220,4 +211,4 @@ public class MrCongRipper extends AbstractHTMLRipper { } } } -} \ No newline at end of file +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MrCongRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MrCongRipperTest.java new file mode 100644 index 00000000..104b9700 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MrCongRipperTest.java @@ -0,0 +1,42 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import org.junit.jupiter.api.Test; + +import com.rarchives.ripme.ripper.rippers.MrCongRipper; + +public class MrCongRipperTest extends RippersTest { + @Test + public void testMrCongAlbumRip1() throws IOException, URISyntaxException { + MrCongRipper ripper = new MrCongRipper(new URI( + "https://misskon.com/87161-xr-uncensored-lin-xing-lan-r18-xiu-ren-jue-mi-3wan-yuan-zi-liao-chao-shi-zhang-16k-qing-te-xie-1174-photos-1-video/") + .toURL()); + testRipper(ripper); + } + + @Test + public void testMrCongAlbumRip2() throws IOException, URISyntaxException { + MrCongRipper ripper = new MrCongRipper( + new URI("https://misskon.com/xiaoyu-vol-799-lin-xing-lan-87-anh/").toURL()); + + testRipper(ripper); + } + + @Test + public void testMrCongAlbumRip3() throws IOException, URISyntaxException { + MrCongRipper ripper = new MrCongRipper( + new URI("https://misskon.com/87163-le-ledb-201b-dayoung-50-photos/").toURL()); + testRipper(ripper); + } + + // Ripping from tags is not yet implemented. Uncomment the @Test line when + // implemented. + // @Test + public void testMrCongTagRip() throws IOException, URISyntaxException { + MrCongRipper ripper = new MrCongRipper(new URI("https://misskon.com/tag/xr-uncensored/").toURL()); + testRipper(ripper); + } +}