From 5a3aea7850c3ae44adc17a29cbb19fd543b74c12 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sun, 20 Apr 2025 01:17:48 -0700 Subject: [PATCH] Try to fix mrcong for redirected site to misskon --- .../ripme/ripper/rippers/MrCongRipper.java | 108 ++++++++---------- 1 file changed, 47 insertions(+), 61 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java index 642c6417..ec2703d8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java @@ -1,7 +1,5 @@ package com.rarchives.ripme.ripper.rippers; -import com.rarchives.ripme.ripper.AbstractHTMLRipper; -import com.rarchives.ripme.utils.Http; import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; @@ -11,12 +9,14 @@ import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; + import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; public class MrCongRipper extends AbstractHTMLRipper { - private Document currDoc; private int lastPageNum; private int currPageNum; @@ -29,43 +29,45 @@ public class MrCongRipper extends AbstractHTMLRipper { @Override public String getHost() { - return "mrcong"; + return "misskon"; } @Override public String getDomain() { - return "mrcong.com"; + return "misskon.com"; } @Override public String getGID(URL url) throws MalformedURLException { System.out.println(url.toExternalForm()); - Pattern p = Pattern.compile("^https?://mrcong\\.com/(\\S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$"); - Pattern p2 = Pattern.compile("^https?://mrcong\\.com/tag/(\\S*)/$"); //Added 6-10-21 + Pattern p = Pattern.compile( + "^https?://misskon\\.com/(\\S*)[0-9]+[-0-9a-zA-Z](-[0-9]+-(?:photos?|ahn)?(-[0-9]+-videos?)?(|/|/[0-9]+)$"); + Pattern p2 = Pattern.compile("^https?://misskon\\.com/tag/(\\S*)/$"); // Added 6-10-21 Matcher m = p.matcher(url.toExternalForm()); - Matcher m2 = p2.matcher(url.toExternalForm()); //6-10-21 + Matcher m2 = p2.matcher(url.toExternalForm()); // 6-10-21 if (m.matches()) { return m.group(1); - } - else if(m2.matches()) { //Added 6-10-21 + } else if (m2.matches()) { // Added 6-10-21 tagPage = true; System.out.println("tagPage = TRUE"); return m2.group(1); } - throw new MalformedURLException("Expected mrcong.com URL format: " - + "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead"); + throw new MalformedURLException("Expected misskon.com URL format: " + + "misskon.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + + " instead"); } @Override - public Document getFirstPage() throws IOException { //returns the root gallery page regardless of actual page number + public Document getFirstPage() throws IOException { // returns the root gallery page regardless of actual page + // number // "url" is an instance field of the superclass String rootUrlStr; URL rootUrl; - if(!tagPage) { + if (!tagPage) { rootUrlStr = url.toExternalForm().replaceAll("(|/|/[0-9]+/?)$", "/"); - } else { //6-10-21 + } else { // 6-10-21 rootUrlStr = url.toExternalForm().replaceAll("(page/[0-9]+/)$", "page/1/"); } @@ -81,7 +83,7 @@ public class MrCongRipper extends AbstractHTMLRipper { public Document getNextPage(Document doc) throws IOException { int pageNum = currPageNum; String urlStr; - if(!tagPage) { + if (!tagPage) { if (pageNum == 1 && lastPageNum > 1) { urlStr = url.toExternalForm().concat((pageNum + 1) + ""); System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr); @@ -89,43 +91,48 @@ public class MrCongRipper extends AbstractHTMLRipper { urlStr = url.toExternalForm().replaceAll("(/([0-9]*)/?)$", ("/" + (pageNum + 1) + "/")); System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr); } else { - //System.out.printf("Error: Page number provided goes past last valid page number\n"); + // System.out.printf("Error: Page number provided goes past last valid page + // number\n"); throw (new IOException("Error: Page number provided goes past last valid page number\n")); } - } else { //6-10-21 - //if (pageNum == 1 && lastPageNum >= 1) { - if (pageNum == 1 && lastPageNum > 1) { //6-10-21 + } else { // 6-10-21 + // if (pageNum == 1 && lastPageNum >= 1) { + if (pageNum == 1 && lastPageNum > 1) { // 6-10-21 urlStr = url.toExternalForm().concat("page/" + (pageNum + 1) + ""); System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr); } else if (pageNum < lastPageNum) { urlStr = url.toExternalForm().replaceAll("(page/([0-9]*)/?)$", ("page/" + (pageNum + 1) + "/")); System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr); } else { - //System.out.printf("Error: Page number provided goes past last valid page number\n"); + // System.out.printf("Error: Page number provided goes past last valid page + // number\n"); System.out.print("Error: There is no next page!\n"); return null; - //throw (new IOException("Error: Page number provided goes past last valid page number\n")); + // throw (new IOException("Error: Page number provided goes past last valid page + // number\n")); } } url = URI.create(urlStr).toURL(); currDoc = Http.url(url).get(); - currPageNum ++;//hi + currPageNum++;// hi return currDoc; } private int getMaxPageNumber(Document doc) { - if(!tagPage) { + if (!tagPage) { try { - lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); //gets the last possible page for the gallery - } catch(Exception e) { + // gets the last possible page for the gallery + lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); + } catch (Exception e) { return 1; } } else { try { - lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); //gets the last possible page for the gallery + // gets the last possible page for the gallery + lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); System.out.println("The last page found for " + url + " was " + lastPageNum); - } catch(Exception e) { + } catch (Exception e) { return 1; } } @@ -134,9 +141,9 @@ public class MrCongRipper extends AbstractHTMLRipper { } private int getCurrentPageNum(Document doc) { - int currPage; //6-10-21 + int currPage; // 6-10-21 - if(!tagPage) { + if (!tagPage) { currPage = Integer.parseInt(doc.select("div.page-link > span").first().text()); } else { currPage = Integer.parseInt(doc.select("div.pagination > span").first().text()); @@ -148,45 +155,25 @@ public class MrCongRipper extends AbstractHTMLRipper { } @Override - public List getURLsFromPage(Document doc) { //gets the urls of the images + public List getURLsFromPage(Document doc) { // gets the urls of the images List result = new ArrayList<>(); - if(!tagPage) { + if (!tagPage) { for (Element el : doc.select("p > img")) { - String imageSource = el.attr("src"); + String imageSource = el.attr("data-src"); result.add(imageSource); } System.out.println("\n1.)Printing List: " + result + "\n"); - } else { //6-10-21 - //List gallery_set_list = new ArrayList<>(); - + } else { for (Element el : doc.select("h2 > a")) { String pageSource = el.attr("href"); - if(!pageSource.equals("https://mrcong.com/")) { + if (!pageSource.equals("https://misskon.com/")) { result.add(pageSource); System.out.println("\n" + pageSource + " has been added to the list."); } } - /*for (String el2 : gallery_set_list) { - try { - URL temp_urL = URI.create(el2).toURL(); - MrCongRipper mcr = new MrCongRipper(temp_urL); - System.out.println("URL being ripped: " + mcr.url.toString()); - result.addAll(mcr.getURLsFromPage(mcr.getFirstPage())); - - Document nextPg = mcr.getNextPage(mcr.currDoc); - while(nextPg != null) { - result.addAll(mcr.getURLsFromPage(nextPg)); - nextPg = mcr.getNextPage(mcr.currDoc); - } - } catch (IOException e) { - e.printStackTrace(); - } - - }*/ - System.out.println("\n2.)Printing List: " + result + "\n"); } @@ -195,21 +182,20 @@ public class MrCongRipper extends AbstractHTMLRipper { @Override public void downloadURL(URL url, int index) { - //addURLToDownload(url, getPrefix(index)); - - if(!tagPage) { + if (!tagPage) { addURLToDownload(url, getPrefix(index)); } else { try { List ls = this.getURLsFromPage(this.currDoc); Document np = this.getNextPage(this.currDoc); - while(np != null) { //Creates a list of all sets to download + // Creates a list of all sets to download + while (np != null) { ls.addAll(this.getURLsFromPage(np)); np = this.getNextPage(np); } - for(String urlStr : ls) { + for (String urlStr : ls) { MrCongRipper mcr = new MrCongRipper(URI.create(urlStr).toURL()); mcr.setup(); mcr.rip(); @@ -220,4 +206,4 @@ public class MrCongRipper extends AbstractHTMLRipper { } } } -} \ No newline at end of file +}