From 1f1f8ea53a46d3fc9f0afb763b9a372ad5f62a35 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sat, 19 Apr 2025 18:37:33 -0700 Subject: [PATCH 1/5] Update CfakeRipper to new site format (Fixes #2116) --- .../ripme/ripper/rippers/CfakeRipper.java | 120 +++++++++--------- .../tst/ripper/rippers/CfakeRipperTest.java | 2 +- 2 files changed, 60 insertions(+), 62 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java index 4372883e..a66d07d0 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java @@ -15,75 +15,73 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; public class CfakeRipper extends AbstractHTMLRipper { - public CfakeRipper(URL url) throws IOException { - super(url); + super(url); } - @Override - public String getHost() { - return "cfake"; + @Override + public String getHost() { + return "cfake"; + } + + @Override + public String getDomain() { + return "cfake.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://cfake\\.com/(?:picture|images/celebrity)/([a-zA-Z1-9_-]*)/\\d+/?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); } + throw new MalformedURLException("Expected cfake URL format: " + + "cfake.com/images/celebrity/MODEL/ID - got " + url + " instead"); + } - @Override - public String getDomain() { - return "cfake.com"; + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + // We use comic-nav-next to the find the next page + Element elem = doc.select("td > div.next > a").first(); + if (elem == null) { + throw new IOException("No more pages"); } - - @Override - public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("https?://cfake\\.com/picture/([a-zA-Z1-9_-]*)/\\d+/?$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); - } - throw new MalformedURLException("Expected cfake URL format: " + - "cfake.com/picture/MODEL/ID - got " + url + " instead"); + String nextPage = elem.attr("href"); + // Some times this returns a empty string + // This for stops that + if (nextPage.equals("")) { + return null; + } else { + return Http.url("http://cfake.com" + nextPage).get(); } + } - @Override - public Document getFirstPage() throws IOException { - // "url" is an instance field of the superclass - return Http.url(url).get(); - } - - @Override - public Document getNextPage(Document doc) throws IOException { - // We use comic-nav-next to the find the next page - Element elem = doc.select("td > div.next > a").first(); - if (elem == null) { - throw new IOException("No more pages"); - } - String nextPage = elem.attr("href"); - // Some times this returns a empty string - // This for stops that - if (nextPage.equals("")) { - return null; - } - else { - return Http.url("http://cfake.com" + nextPage).get(); - } - } - - @Override - public List getURLsFromPage(Document doc) { - List result = new ArrayList<>(); - for (Element el : doc.select("table.display > tbody > tr > td > table > tbody > tr > td > a")) { - if (el.attr("href").contains("upload")) { - return result; - } else { - String imageSource = el.select("img").attr("src"); - // We remove the .md from images so we download the full size image - // not the thumbnail ones - imageSource = imageSource.replace("thumbs", "photos"); - result.add("http://cfake.com" + imageSource); - } - } + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + for (Element el : doc.select("table.display > tbody > tr > td > table > tbody > tr > td > a")) { + if (el.attr("href").contains("upload")) { return result; + } else { + String imageSource = el.select("img").attr("src"); + // We remove the .md from images so we download the full size image + // not the thumbnail ones + imageSource = imageSource.replace("thumbs", "photos"); + result.add("http://cfake.com" + imageSource); + } } - - @Override - public void downloadURL(URL url, int index) { - addURLToDownload(url, getPrefix(index)); - } + return result; } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CfakeRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CfakeRipperTest.java index 95f7ec2e..6a5f5abb 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CfakeRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CfakeRipperTest.java @@ -8,7 +8,7 @@ import com.rarchives.ripme.ripper.rippers.CfakeRipper; public class CfakeRipperTest extends RippersTest { public void testRip() throws IOException, URISyntaxException { - CfakeRipper ripper = new CfakeRipper(new URI("http://cfake.com/picture/Zooey_Deschanel/1264").toURL()); + CfakeRipper ripper = new CfakeRipper(new URI("https://cfake.com/images/celebrity/Zooey_Deschanel/1264").toURL()); testRipper(ripper); } } From 7ca2165b12c39526596f9cf045a10c7f4f95f28f Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sat, 19 Apr 2025 19:18:29 -0700 Subject: [PATCH 2/5] reimplement --- .../ripme/ripper/rippers/CfakeRipper.java | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java index a66d07d0..16c90d8c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java @@ -10,6 +10,7 @@ import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; @@ -31,7 +32,7 @@ public class CfakeRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("https?://cfake\\.com/(?:picture|images/celebrity)/([a-zA-Z1-9_-]*)/\\d+/?$"); + Pattern p = Pattern.compile("https?://cfake\\.com/images/celebrity/([a-zA-Z1-9_-]*)/\\d+/?$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); @@ -49,13 +50,26 @@ public class CfakeRipper extends AbstractHTMLRipper { @Override public Document getNextPage(Document doc) throws IOException { // We use comic-nav-next to the find the next page - Element elem = doc.select("td > div.next > a").first(); + Element elem = doc.select("div#wrapper_path div#content_path div#num_page").last(); if (elem == null) { - throw new IOException("No more pages"); + throw new IOException("No more pages (cannot find nav)"); } - String nextPage = elem.attr("href"); - // Some times this returns a empty string - // This for stops that + + Element nextAnchor = elem.select("a").first(); + if (nextAnchor == null) { + throw new IOException("No more pages (cannot find anchor)"); + } + + Elements nextSpans = nextAnchor.select("span"); + if (nextSpans.isEmpty()) { + // This is the expected case that we're done iterating. + throw new IOException("No more pages (last page)"); + } + + // Use the nextAnchor (parent of the span) for the URL + String nextPage = nextAnchor.attr("href"); + + // Sometimes this returns an empty string; this stops that if (nextPage.equals("")) { return null; } else { @@ -66,17 +80,15 @@ public class CfakeRipper extends AbstractHTMLRipper { @Override public List getURLsFromPage(Document doc) { List result = new ArrayList<>(); - for (Element el : doc.select("table.display > tbody > tr > td > table > tbody > tr > td > a")) { - if (el.attr("href").contains("upload")) { - return result; - } else { - String imageSource = el.select("img").attr("src"); - // We remove the .md from images so we download the full size image - // not the thumbnail ones - imageSource = imageSource.replace("thumbs", "photos"); - result.add("http://cfake.com" + imageSource); - } + for (Element el : doc.select("div#media_content .responsive .gallery > a img")) { + // Convert found src value e.g. /medias/thumbs/2025/17358722979850276d_cfake.jpg + // to photo src value e.g. + // https://cfake.com/medias/photos/2025/17358722979850276d_cfake.jpg + String imageSource = el.attr("src"); + imageSource = imageSource.replace("thumbs", "photos"); + result.add("http://cfake.com" + imageSource); } + return result; } From 5f5e346ee48d06ea3e8b982548f518032f969251 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sat, 19 Apr 2025 19:21:11 -0700 Subject: [PATCH 3/5] use https --- .../java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java index 16c90d8c..c7df28d3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java @@ -73,7 +73,7 @@ public class CfakeRipper extends AbstractHTMLRipper { if (nextPage.equals("")) { return null; } else { - return Http.url("http://cfake.com" + nextPage).get(); + return Http.url("https://cfake.com" + nextPage).get(); } } @@ -86,7 +86,7 @@ public class CfakeRipper extends AbstractHTMLRipper { // https://cfake.com/medias/photos/2025/17358722979850276d_cfake.jpg String imageSource = el.attr("src"); imageSource = imageSource.replace("thumbs", "photos"); - result.add("http://cfake.com" + imageSource); + result.add("https://cfake.com" + imageSource); } return result; From 3062485d58635ea9750441e911289d098d958678 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sat, 19 Apr 2025 19:37:17 -0700 Subject: [PATCH 4/5] Fix test --- .../rarchives/ripme/tst/ripper/rippers/CfakeRipperTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CfakeRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CfakeRipperTest.java index 6a5f5abb..b36401b4 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CfakeRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CfakeRipperTest.java @@ -4,11 +4,15 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import org.junit.jupiter.api.Test; + import com.rarchives.ripme.ripper.rippers.CfakeRipper; public class CfakeRipperTest extends RippersTest { + @Test public void testRip() throws IOException, URISyntaxException { - CfakeRipper ripper = new CfakeRipper(new URI("https://cfake.com/images/celebrity/Zooey_Deschanel/1264").toURL()); + CfakeRipper ripper = new CfakeRipper( + new URI("https://cfake.com/images/celebrity/Zooey_Deschanel/1264").toURL()); testRipper(ripper); } } From ee0b1a1d6b3e50a4a4f5c74446be9e8dacbc364e Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Sat, 19 Apr 2025 19:40:48 -0700 Subject: [PATCH 5/5] cleanup --- .../java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java index c7df28d3..f1d8c8a2 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/CfakeRipper.java @@ -49,7 +49,6 @@ public class CfakeRipper extends AbstractHTMLRipper { @Override public Document getNextPage(Document doc) throws IOException { - // We use comic-nav-next to the find the next page Element elem = doc.select("div#wrapper_path div#content_path div#num_page").last(); if (elem == null) { throw new IOException("No more pages (cannot find nav)");