1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-07-31 19:10:15 +02:00

Fix MrCongRipper to use new URL misskon.com (Fixes #2127) (#2128)

This ripper was previously for mrcong.com, which now redirects to
misskon.com. Some resources still refer to mrcong.com
but all the top level URLs are now misskon.com

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
metaprime
2025-04-20 01:50:48 -07:00
committed by GitHub
parent 78ff73fb35
commit 531f235154
2 changed files with 95 additions and 62 deletions

View File

@@ -1,7 +1,5 @@
package com.rarchives.ripme.ripper.rippers; package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URI; import java.net.URI;
@@ -11,12 +9,14 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
public class MrCongRipper extends AbstractHTMLRipper { public class MrCongRipper extends AbstractHTMLRipper {
private Document currDoc; private Document currDoc;
private int lastPageNum; private int lastPageNum;
private int currPageNum; private int currPageNum;
@@ -29,43 +29,48 @@ public class MrCongRipper extends AbstractHTMLRipper {
@Override @Override
public String getHost() { public String getHost() {
return "mrcong"; return "misskon";
} }
@Override @Override
public String getDomain() { public String getDomain() {
return "mrcong.com"; // NOTE: This was previously mrcong.com, which now redirects to
// misskon.com. Some resources still refer to mrcong.com
// but all the top level URLs are now misskon.com
return "misskon.com";
} }
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
System.out.println(url.toExternalForm()); System.out.println(url.toExternalForm());
Pattern p = Pattern.compile("^https?://mrcong\\.com/(\\S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$"); Pattern p = Pattern.compile(
Pattern p2 = Pattern.compile("^https?://mrcong\\.com/tag/(\\S*)/$"); //Added 6-10-21 "^https?://(?:[a-z]+\\.)?misskon\\.com/([-0-9a-zA-Z]+)(?:/?|/[0-9]+/?)?$");
Pattern p2 = Pattern.compile("^https?://misskon\\.com/tag/(\\S*)/$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
Matcher m2 = p2.matcher(url.toExternalForm()); //6-10-21 Matcher m2 = p2.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} } else if (m2.matches()) {
else if(m2.matches()) { //Added 6-10-21
tagPage = true; tagPage = true;
System.out.println("tagPage = TRUE"); System.out.println("tagPage = TRUE");
return m2.group(1); return m2.group(1);
} }
throw new MalformedURLException("Expected mrcong.com URL format: " throw new MalformedURLException("Expected misskon.com URL format: "
+ "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead"); + "misskon.com/GALLERY_NAME (or /PAGE_NUMBER/) - got " + url
+ " instead");
} }
@Override @Override
public Document getFirstPage() throws IOException { //returns the root gallery page regardless of actual page number public Document getFirstPage() throws IOException {
// returns the root gallery page regardless of actual page number
// "url" is an instance field of the superclass // "url" is an instance field of the superclass
String rootUrlStr; String rootUrlStr;
URL rootUrl; URL rootUrl;
if(!tagPage) { if (!tagPage) {
rootUrlStr = url.toExternalForm().replaceAll("(|/|/[0-9]+/?)$", "/"); rootUrlStr = url.toExternalForm().replaceAll("(|/|/[0-9]+/?)$", "/");
} else { //6-10-21 } else { // 6-10-21
rootUrlStr = url.toExternalForm().replaceAll("(page/[0-9]+/)$", "page/1/"); rootUrlStr = url.toExternalForm().replaceAll("(page/[0-9]+/)$", "page/1/");
} }
@@ -81,7 +86,7 @@ public class MrCongRipper extends AbstractHTMLRipper {
public Document getNextPage(Document doc) throws IOException { public Document getNextPage(Document doc) throws IOException {
int pageNum = currPageNum; int pageNum = currPageNum;
String urlStr; String urlStr;
if(!tagPage) { if (!tagPage) {
if (pageNum == 1 && lastPageNum > 1) { if (pageNum == 1 && lastPageNum > 1) {
urlStr = url.toExternalForm().concat((pageNum + 1) + ""); urlStr = url.toExternalForm().concat((pageNum + 1) + "");
System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr); System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
@@ -89,43 +94,48 @@ public class MrCongRipper extends AbstractHTMLRipper {
urlStr = url.toExternalForm().replaceAll("(/([0-9]*)/?)$", ("/" + (pageNum + 1) + "/")); urlStr = url.toExternalForm().replaceAll("(/([0-9]*)/?)$", ("/" + (pageNum + 1) + "/"));
System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr); System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
} else { } else {
//System.out.printf("Error: Page number provided goes past last valid page number\n"); // System.out.printf("Error: Page number provided goes past last valid page
// number\n");
throw (new IOException("Error: Page number provided goes past last valid page number\n")); throw (new IOException("Error: Page number provided goes past last valid page number\n"));
} }
} else { //6-10-21 } else { // 6-10-21
//if (pageNum == 1 && lastPageNum >= 1) { // if (pageNum == 1 && lastPageNum >= 1) {
if (pageNum == 1 && lastPageNum > 1) { //6-10-21 if (pageNum == 1 && lastPageNum > 1) { // 6-10-21
urlStr = url.toExternalForm().concat("page/" + (pageNum + 1) + ""); urlStr = url.toExternalForm().concat("page/" + (pageNum + 1) + "");
System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr); System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
} else if (pageNum < lastPageNum) { } else if (pageNum < lastPageNum) {
urlStr = url.toExternalForm().replaceAll("(page/([0-9]*)/?)$", ("page/" + (pageNum + 1) + "/")); urlStr = url.toExternalForm().replaceAll("(page/([0-9]*)/?)$", ("page/" + (pageNum + 1) + "/"));
System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr); System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
} else { } else {
//System.out.printf("Error: Page number provided goes past last valid page number\n"); // System.out.printf("Error: Page number provided goes past last valid page
// number\n");
System.out.print("Error: There is no next page!\n"); System.out.print("Error: There is no next page!\n");
return null; return null;
//throw (new IOException("Error: Page number provided goes past last valid page number\n")); // throw (new IOException("Error: Page number provided goes past last valid page
// number\n"));
} }
} }
url = URI.create(urlStr).toURL(); url = URI.create(urlStr).toURL();
currDoc = Http.url(url).get(); currDoc = Http.url(url).get();
currPageNum ++;//hi currPageNum++;// hi
return currDoc; return currDoc;
} }
private int getMaxPageNumber(Document doc) { private int getMaxPageNumber(Document doc) {
if(!tagPage) { if (!tagPage) {
try { try {
lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); //gets the last possible page for the gallery // gets the last possible page for the gallery
} catch(Exception e) { lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text());
} catch (Exception e) {
return 1; return 1;
} }
} else { } else {
try { try {
lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); //gets the last possible page for the gallery // gets the last possible page for the gallery
lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text());
System.out.println("The last page found for " + url + " was " + lastPageNum); System.out.println("The last page found for " + url + " was " + lastPageNum);
} catch(Exception e) { } catch (Exception e) {
return 1; return 1;
} }
} }
@@ -134,9 +144,9 @@ public class MrCongRipper extends AbstractHTMLRipper {
} }
private int getCurrentPageNum(Document doc) { private int getCurrentPageNum(Document doc) {
int currPage; //6-10-21 int currPage; // 6-10-21
if(!tagPage) { if (!tagPage) {
currPage = Integer.parseInt(doc.select("div.page-link > span").first().text()); currPage = Integer.parseInt(doc.select("div.page-link > span").first().text());
} else { } else {
currPage = Integer.parseInt(doc.select("div.pagination > span").first().text()); currPage = Integer.parseInt(doc.select("div.pagination > span").first().text());
@@ -148,45 +158,27 @@ public class MrCongRipper extends AbstractHTMLRipper {
} }
@Override @Override
public List<String> getURLsFromPage(Document doc) { //gets the urls of the images public List<String> getURLsFromPage(Document doc) { // gets the urls of the images
List<String> result = new ArrayList<>(); List<String> result = new ArrayList<>();
if(!tagPage) { if (!tagPage) {
for (Element el : doc.select("p > img")) { for (Element el : doc.select("p > img")) {
String imageSource = el.attr("src"); String imageSource = el.attr("data-src");
result.add(imageSource); if (imageSource == null || imageSource.isEmpty()) {
imageSource = el.attr("src");
}
} }
System.out.println("\n1.)Printing List: " + result + "\n"); System.out.println("\n1.)Printing List: " + result + "\n");
} else { //6-10-21 } else {
//List<String> gallery_set_list = new ArrayList<>();
for (Element el : doc.select("h2 > a")) { for (Element el : doc.select("h2 > a")) {
String pageSource = el.attr("href"); String pageSource = el.attr("href");
if(!pageSource.equals("https://mrcong.com/")) { if (!pageSource.equals("https://misskon.com/")) {
result.add(pageSource); result.add(pageSource);
System.out.println("\n" + pageSource + " has been added to the list."); System.out.println("\n" + pageSource + " has been added to the list.");
} }
} }
/*for (String el2 : gallery_set_list) {
try {
URL temp_urL = URI.create(el2).toURL();
MrCongRipper mcr = new MrCongRipper(temp_urL);
System.out.println("URL being ripped: " + mcr.url.toString());
result.addAll(mcr.getURLsFromPage(mcr.getFirstPage()));
Document nextPg = mcr.getNextPage(mcr.currDoc);
while(nextPg != null) {
result.addAll(mcr.getURLsFromPage(nextPg));
nextPg = mcr.getNextPage(mcr.currDoc);
}
} catch (IOException e) {
e.printStackTrace();
}
}*/
System.out.println("\n2.)Printing List: " + result + "\n"); System.out.println("\n2.)Printing List: " + result + "\n");
} }
@@ -195,21 +187,20 @@ public class MrCongRipper extends AbstractHTMLRipper {
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
//addURLToDownload(url, getPrefix(index)); if (!tagPage) {
if(!tagPage) {
addURLToDownload(url, getPrefix(index)); addURLToDownload(url, getPrefix(index));
} else { } else {
try { try {
List<String> ls = this.getURLsFromPage(this.currDoc); List<String> ls = this.getURLsFromPage(this.currDoc);
Document np = this.getNextPage(this.currDoc); Document np = this.getNextPage(this.currDoc);
while(np != null) { //Creates a list of all sets to download // Creates a list of all sets to download
while (np != null) {
ls.addAll(this.getURLsFromPage(np)); ls.addAll(this.getURLsFromPage(np));
np = this.getNextPage(np); np = this.getNextPage(np);
} }
for(String urlStr : ls) { for (String urlStr : ls) {
MrCongRipper mcr = new MrCongRipper(URI.create(urlStr).toURL()); MrCongRipper mcr = new MrCongRipper(URI.create(urlStr).toURL());
mcr.setup(); mcr.setup();
mcr.rip(); mcr.rip();
@@ -220,4 +211,4 @@ public class MrCongRipper extends AbstractHTMLRipper {
} }
} }
} }
} }

View File

@@ -0,0 +1,42 @@
package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.junit.jupiter.api.Test;
import com.rarchives.ripme.ripper.rippers.MrCongRipper;
public class MrCongRipperTest extends RippersTest {
@Test
public void testMrCongAlbumRip1() throws IOException, URISyntaxException {
MrCongRipper ripper = new MrCongRipper(new URI(
"https://misskon.com/87161-xr-uncensored-lin-xing-lan-r18-xiu-ren-jue-mi-3wan-yuan-zi-liao-chao-shi-zhang-16k-qing-te-xie-1174-photos-1-video/")
.toURL());
testRipper(ripper);
}
@Test
public void testMrCongAlbumRip2() throws IOException, URISyntaxException {
MrCongRipper ripper = new MrCongRipper(
new URI("https://misskon.com/xiaoyu-vol-799-lin-xing-lan-87-anh/").toURL());
testRipper(ripper);
}
@Test
public void testMrCongAlbumRip3() throws IOException, URISyntaxException {
MrCongRipper ripper = new MrCongRipper(
new URI("https://misskon.com/87163-le-ledb-201b-dayoung-50-photos/").toURL());
testRipper(ripper);
}
// Ripping from tags is not yet implemented. Uncomment the @Test line when
// implemented.
// @Test
public void testMrCongTagRip() throws IOException, URISyntaxException {
MrCongRipper ripper = new MrCongRipper(new URI("https://misskon.com/tag/xr-uncensored/").toURL());
testRipper(ripper);
}
}