From 5a3aea7850c3ae44adc17a29cbb19fd543b74c12 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sun, 20 Apr 2025 01:17:48 -0700 Subject: [PATCH 1/6] Try to fix mrcong for redirected site to misskon --- .../ripme/ripper/rippers/MrCongRipper.java | 108 ++++++++---------- 1 file changed, 47 insertions(+), 61 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java index 642c6417..ec2703d8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java @@ -1,7 +1,5 @@ package com.rarchives.ripme.ripper.rippers; -import com.rarchives.ripme.ripper.AbstractHTMLRipper; -import com.rarchives.ripme.utils.Http; import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; @@ -11,12 +9,14 @@ import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; + import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; public class MrCongRipper extends AbstractHTMLRipper { - private Document currDoc; private int lastPageNum; private int currPageNum; @@ -29,43 +29,45 @@ public class MrCongRipper extends AbstractHTMLRipper { @Override public String getHost() { - return "mrcong"; + return "misskon"; } @Override public String getDomain() { - return "mrcong.com"; + return "misskon.com"; } @Override public String getGID(URL url) throws MalformedURLException { System.out.println(url.toExternalForm()); - Pattern p = Pattern.compile("^https?://mrcong\\.com/(\\S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$"); - Pattern p2 = Pattern.compile("^https?://mrcong\\.com/tag/(\\S*)/$"); //Added 6-10-21 + Pattern p = Pattern.compile( + "^https?://misskon\\.com/(\\S*)[0-9]+[-0-9a-zA-Z](-[0-9]+-(?:photos?|ahn)?(-[0-9]+-videos?)?(|/|/[0-9]+)$"); + Pattern p2 = Pattern.compile("^https?://misskon\\.com/tag/(\\S*)/$"); // Added 6-10-21 Matcher m = p.matcher(url.toExternalForm()); - Matcher m2 = p2.matcher(url.toExternalForm()); //6-10-21 + Matcher m2 = p2.matcher(url.toExternalForm()); // 6-10-21 if (m.matches()) { return m.group(1); - } - else if(m2.matches()) { //Added 6-10-21 + } else if (m2.matches()) { // Added 6-10-21 tagPage = true; System.out.println("tagPage = TRUE"); return m2.group(1); } - throw new MalformedURLException("Expected mrcong.com URL format: " - + "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead"); + throw new MalformedURLException("Expected misskon.com URL format: " + + "misskon.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + + " instead"); } @Override - public Document getFirstPage() throws IOException { //returns the root gallery page regardless of actual page number + public Document getFirstPage() throws IOException { // returns the root gallery page regardless of actual page + // number // "url" is an instance field of the superclass String rootUrlStr; URL rootUrl; - if(!tagPage) { + if (!tagPage) { rootUrlStr = url.toExternalForm().replaceAll("(|/|/[0-9]+/?)$", "/"); - } else { //6-10-21 + } else { // 6-10-21 rootUrlStr = url.toExternalForm().replaceAll("(page/[0-9]+/)$", "page/1/"); } @@ -81,7 +83,7 @@ public class MrCongRipper extends AbstractHTMLRipper { public Document getNextPage(Document doc) throws IOException { int pageNum = currPageNum; String urlStr; - if(!tagPage) { + if (!tagPage) { if (pageNum == 1 && lastPageNum > 1) { urlStr = url.toExternalForm().concat((pageNum + 1) + ""); System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr); @@ -89,43 +91,48 @@ public class MrCongRipper extends AbstractHTMLRipper { urlStr = url.toExternalForm().replaceAll("(/([0-9]*)/?)$", ("/" + (pageNum + 1) + "/")); System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr); } else { - //System.out.printf("Error: Page number provided goes past last valid page number\n"); + // System.out.printf("Error: Page number provided goes past last valid page + // number\n"); throw (new IOException("Error: Page number provided goes past last valid page number\n")); } - } else { //6-10-21 - //if (pageNum == 1 && lastPageNum >= 1) { - if (pageNum == 1 && lastPageNum > 1) { //6-10-21 + } else { // 6-10-21 + // if (pageNum == 1 && lastPageNum >= 1) { + if (pageNum == 1 && lastPageNum > 1) { // 6-10-21 urlStr = url.toExternalForm().concat("page/" + (pageNum + 1) + ""); System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr); } else if (pageNum < lastPageNum) { urlStr = url.toExternalForm().replaceAll("(page/([0-9]*)/?)$", ("page/" + (pageNum + 1) + "/")); System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr); } else { - //System.out.printf("Error: Page number provided goes past last valid page number\n"); + // System.out.printf("Error: Page number provided goes past last valid page + // number\n"); System.out.print("Error: There is no next page!\n"); return null; - //throw (new IOException("Error: Page number provided goes past last valid page number\n")); + // throw (new IOException("Error: Page number provided goes past last valid page + // number\n")); } } url = URI.create(urlStr).toURL(); currDoc = Http.url(url).get(); - currPageNum ++;//hi + currPageNum++;// hi return currDoc; } private int getMaxPageNumber(Document doc) { - if(!tagPage) { + if (!tagPage) { try { - lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); //gets the last possible page for the gallery - } catch(Exception e) { + // gets the last possible page for the gallery + lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); + } catch (Exception e) { return 1; } } else { try { - lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); //gets the last possible page for the gallery + // gets the last possible page for the gallery + lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); System.out.println("The last page found for " + url + " was " + lastPageNum); - } catch(Exception e) { + } catch (Exception e) { return 1; } } @@ -134,9 +141,9 @@ public class MrCongRipper extends AbstractHTMLRipper { } private int getCurrentPageNum(Document doc) { - int currPage; //6-10-21 + int currPage; // 6-10-21 - if(!tagPage) { + if (!tagPage) { currPage = Integer.parseInt(doc.select("div.page-link > span").first().text()); } else { currPage = Integer.parseInt(doc.select("div.pagination > span").first().text()); @@ -148,45 +155,25 @@ public class MrCongRipper extends AbstractHTMLRipper { } @Override - public List getURLsFromPage(Document doc) { //gets the urls of the images + public List getURLsFromPage(Document doc) { // gets the urls of the images List result = new ArrayList<>(); - if(!tagPage) { + if (!tagPage) { for (Element el : doc.select("p > img")) { - String imageSource = el.attr("src"); + String imageSource = el.attr("data-src"); result.add(imageSource); } System.out.println("\n1.)Printing List: " + result + "\n"); - } else { //6-10-21 - //List gallery_set_list = new ArrayList<>(); - + } else { for (Element el : doc.select("h2 > a")) { String pageSource = el.attr("href"); - if(!pageSource.equals("https://mrcong.com/")) { + if (!pageSource.equals("https://misskon.com/")) { result.add(pageSource); System.out.println("\n" + pageSource + " has been added to the list."); } } - /*for (String el2 : gallery_set_list) { - try { - URL temp_urL = URI.create(el2).toURL(); - MrCongRipper mcr = new MrCongRipper(temp_urL); - System.out.println("URL being ripped: " + mcr.url.toString()); - result.addAll(mcr.getURLsFromPage(mcr.getFirstPage())); - - Document nextPg = mcr.getNextPage(mcr.currDoc); - while(nextPg != null) { - result.addAll(mcr.getURLsFromPage(nextPg)); - nextPg = mcr.getNextPage(mcr.currDoc); - } - } catch (IOException e) { - e.printStackTrace(); - } - - }*/ - System.out.println("\n2.)Printing List: " + result + "\n"); } @@ -195,21 +182,20 @@ public class MrCongRipper extends AbstractHTMLRipper { @Override public void downloadURL(URL url, int index) { - //addURLToDownload(url, getPrefix(index)); - - if(!tagPage) { + if (!tagPage) { addURLToDownload(url, getPrefix(index)); } else { try { List ls = this.getURLsFromPage(this.currDoc); Document np = this.getNextPage(this.currDoc); - while(np != null) { //Creates a list of all sets to download + // Creates a list of all sets to download + while (np != null) { ls.addAll(this.getURLsFromPage(np)); np = this.getNextPage(np); } - for(String urlStr : ls) { + for (String urlStr : ls) { MrCongRipper mcr = new MrCongRipper(URI.create(urlStr).toURL()); mcr.setup(); mcr.rip(); @@ -220,4 +206,4 @@ public class MrCongRipper extends AbstractHTMLRipper { } } } -} \ No newline at end of file +} From 30f905f0294ebf6011a6198e5c51c638bf9c8b33 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sun, 20 Apr 2025 01:20:14 -0700 Subject: [PATCH 2/6] fixes --- .../rarchives/ripme/ripper/rippers/MrCongRipper.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java index ec2703d8..c7fb9c96 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java @@ -41,13 +41,13 @@ public class MrCongRipper extends AbstractHTMLRipper { public String getGID(URL url) throws MalformedURLException { System.out.println(url.toExternalForm()); Pattern p = Pattern.compile( - "^https?://misskon\\.com/(\\S*)[0-9]+[-0-9a-zA-Z](-[0-9]+-(?:photos?|ahn)?(-[0-9]+-videos?)?(|/|/[0-9]+)$"); - Pattern p2 = Pattern.compile("^https?://misskon\\.com/tag/(\\S*)/$"); // Added 6-10-21 + "^https?://misskon\\.com/(\\S*)[0-9]+[-0-9a-zA-Z](-[0-9]+-(?:photos?|ahn))?(-[0-9]+-videos?)?(|/|/[0-9]+)$"); + Pattern p2 = Pattern.compile("^https?://misskon\\.com/tag/(\\S*)/$"); Matcher m = p.matcher(url.toExternalForm()); - Matcher m2 = p2.matcher(url.toExternalForm()); // 6-10-21 + Matcher m2 = p2.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); - } else if (m2.matches()) { // Added 6-10-21 + } else if (m2.matches()) { tagPage = true; System.out.println("tagPage = TRUE"); return m2.group(1); @@ -59,8 +59,8 @@ public class MrCongRipper extends AbstractHTMLRipper { } @Override - public Document getFirstPage() throws IOException { // returns the root gallery page regardless of actual page - // number + public Document getFirstPage() throws IOException { + // returns the root gallery page regardless of actual page number // "url" is an instance field of the superclass String rootUrlStr; URL rootUrl; From 26f4c53459ae0359939fb012f0e07a51cffd2cde Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sun, 20 Apr 2025 01:36:11 -0700 Subject: [PATCH 3/6] fixes --- .../java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java index c7fb9c96..a3e70d43 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java @@ -41,7 +41,7 @@ public class MrCongRipper extends AbstractHTMLRipper { public String getGID(URL url) throws MalformedURLException { System.out.println(url.toExternalForm()); Pattern p = Pattern.compile( - "^https?://misskon\\.com/(\\S*)[0-9]+[-0-9a-zA-Z](-[0-9]+-(?:photos?|ahn))?(-[0-9]+-videos?)?(|/|/[0-9]+)$"); + "^https?://(?:[a-z]+\\.)?misskon\\.com/([-0-9a-zA-Z]+)(?:/?|/[0-9]+/?)?$"); Pattern p2 = Pattern.compile("^https?://misskon\\.com/tag/(\\S*)/$"); Matcher m = p.matcher(url.toExternalForm()); Matcher m2 = p2.matcher(url.toExternalForm()); From b87dc820853ed9ba8b91cd4122cbe31e45445d74 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sun, 20 Apr 2025 01:38:21 -0700 Subject: [PATCH 4/6] cleanup --- .../java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java index a3e70d43..f61da405 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java @@ -34,6 +34,8 @@ public class MrCongRipper extends AbstractHTMLRipper { @Override public String getDomain() { + // NOTE: This was previous mrcong.com, and some resources still + // refer to that domain, but all the top level URLs are now misskon.com return "misskon.com"; } @@ -54,7 +56,7 @@ public class MrCongRipper extends AbstractHTMLRipper { } throw new MalformedURLException("Expected misskon.com URL format: " - + "misskon.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + + "misskon.com/GALLERY_NAME (or /PAGE_NUMBER/) - got " + url + " instead"); } From 09fd94c63e1b991b1c51afa76b7e73ea6be4ad31 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sun, 20 Apr 2025 01:44:41 -0700 Subject: [PATCH 5/6] comments --- .../com/rarchives/ripme/ripper/rippers/MrCongRipper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java index f61da405..83df8e5f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MrCongRipper.java @@ -34,8 +34,9 @@ public class MrCongRipper extends AbstractHTMLRipper { @Override public String getDomain() { - // NOTE: This was previous mrcong.com, and some resources still - // refer to that domain, but all the top level URLs are now misskon.com + // NOTE: This was previously mrcong.com, which now redirects to + // misskon.com. Some resources still refer to mrcong.com + // but all the top level URLs are now misskon.com return "misskon.com"; } From 49055d98f6158753c27e40f3a79c1df5c536b9f2 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sun, 20 Apr 2025 01:48:36 -0700 Subject: [PATCH 6/6] add MrCongRipperTest.java --- .../tst/ripper/rippers/MrCongRipperTest.java | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/MrCongRipperTest.java diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MrCongRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MrCongRipperTest.java new file mode 100644 index 00000000..104b9700 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MrCongRipperTest.java @@ -0,0 +1,42 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import org.junit.jupiter.api.Test; + +import com.rarchives.ripme.ripper.rippers.MrCongRipper; + +public class MrCongRipperTest extends RippersTest { + @Test + public void testMrCongAlbumRip1() throws IOException, URISyntaxException { + MrCongRipper ripper = new MrCongRipper(new URI( + "https://misskon.com/87161-xr-uncensored-lin-xing-lan-r18-xiu-ren-jue-mi-3wan-yuan-zi-liao-chao-shi-zhang-16k-qing-te-xie-1174-photos-1-video/") + .toURL()); + testRipper(ripper); + } + + @Test + public void testMrCongAlbumRip2() throws IOException, URISyntaxException { + MrCongRipper ripper = new MrCongRipper( + new URI("https://misskon.com/xiaoyu-vol-799-lin-xing-lan-87-anh/").toURL()); + + testRipper(ripper); + } + + @Test + public void testMrCongAlbumRip3() throws IOException, URISyntaxException { + MrCongRipper ripper = new MrCongRipper( + new URI("https://misskon.com/87163-le-ledb-201b-dayoung-50-photos/").toURL()); + testRipper(ripper); + } + + // Ripping from tags is not yet implemented. Uncomment the @Test line when + // implemented. + // @Test + public void testMrCongTagRip() throws IOException, URISyntaxException { + MrCongRipper ripper = new MrCongRipper(new URI("https://misskon.com/tag/xr-uncensored/").toURL()); + testRipper(ripper); + } +}