1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-17 03:14:03 +02:00

* fixed imagefap ripper (switching from img/src to img/data-src)

This commit is contained in:
brantspar
2023-07-15 15:14:23 +10:00
committed by soloturn
parent ecf427cdee
commit 836a74940e
3 changed files with 33 additions and 11 deletions

View File

@@ -160,7 +160,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
for (String imageURL : imageURLs) {
index += 1;
LOGGER.debug("Found image url #" + index + ": " + imageURL);
LOGGER.debug("Found image url #" + index + ": '" + imageURL + "'");
downloadURL(new URL(imageURL), index);
if (isStopped() || isThisATest()) {
break;

View File

@@ -1,8 +1,11 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@@ -10,6 +13,7 @@ import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
@@ -141,6 +145,8 @@ public class ImagefapRipper extends AbstractHTMLRipper {
if(image == null)
throw new RuntimeException("Unable to extract image URL from single image page! Unable to continue");
LOGGER.debug("Adding imageURL: '" + image + "'");
imageURLs.add(image);
if (isThisATest()) {
break;
@@ -177,9 +183,29 @@ public class ImagefapRipper extends AbstractHTMLRipper {
sleep(IMAGE_SLEEP_TIME);
Document doc = getPageWithRetries(new URL(pageURL));
return doc.select("img#mainPhoto").attr("src");
String framedPhotoUrl = doc.select("img#mainPhoto").attr("data-src");
// we use a no query param version of the URL to reduce failure rate because of some query params that change between the li elements and the mainPhotoURL
String noQueryPhotoUrl = framedPhotoUrl.split("\\?")[0];
LOGGER.debug("noQueryPhotoUrl: " + noQueryPhotoUrl);
// we look for a li > a element who's framed attribute starts with the noQueryPhotoUrl (only reference in the page to the full URL)
Elements selectedItem = doc.select("ul.thumbs > li > a[framed^='"+noQueryPhotoUrl+"']");
// the fullsize URL is in the href attribute
String fullSizedUrl = selectedItem.attr("href");
if("".equals(fullSizedUrl))
throw new IOException("JSoup full URL extraction failed from '" + selectedItem.html() + "'");
LOGGER.debug("fullSizedUrl: " + fullSizedUrl);
return fullSizedUrl;
} catch (IOException e) {
LOGGER.debug("Unable to get full size image URL from page URL " + pageURL + " because: " + e.getMessage());
LOGGER.debug("Unable to get full size image URL from page: " + pageURL + " because: " + e.getMessage());
return null;
}
}

View File

@@ -19,12 +19,8 @@ public class ImagefapRipperTest extends RippersTest {
Map<URL, String> testURLs = new HashMap<>();
// Album with specific title
testURLs.put(new URI("http://www.imagefap.com/pictures/4649440/Frozen-%28Elsa-and-Anna%29?view=2").toURL(),
"Frozen (Elsa and Anna)");
// New URL format
testURLs.put(new URI("http://www.imagefap.com/gallery.php?pgid=fffd68f659befa5535cf78f014e348f1").toURL(),
"imagefap_fffd68f659befa5535cf78f014e348f1");
testURLs.put(new URI("https://www.imagefap.com/pictures/11365460/Cartoons").toURL(),
"Cartoons");
for (URL url : testURLs.keySet()) {
ImagefapRipper ripper = new ImagefapRipper(url);
@@ -34,8 +30,8 @@ public class ImagefapRipperTest extends RippersTest {
@Test
@Tag("flaky")
public void testImagefapGetAlbumTitle() throws IOException, URISyntaxException {
URL url = new URI("https://www.imagefap.com/gallery.php?gid=7789753").toURL();
URL url = new URI("https://www.imagefap.com/pictures/11365460/Cartoons").toURL();
ImagefapRipper ripper = new ImagefapRipper(url);
Assertions.assertEquals("imagefap_Red.Heels.Lover.In.Love_7789753", ripper.getAlbumTitle(url));
Assertions.assertEquals("imagefap_Cartoons_11365460", ripper.getAlbumTitle(url));
}
}