diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java index 2ffddb70..e56f8dbc 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java @@ -13,12 +13,17 @@ import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.utils.Http; public class LusciousRipper extends AbstractHTMLRipper { + private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception. + + private Pattern p = Pattern.compile("^https?://(?:members.)?luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$"); + private DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool"); public LusciousRipper(URL url) throws IOException { - super(url); + super(url); } @Override @@ -35,57 +40,83 @@ public class LusciousRipper extends AbstractHTMLRipper { public Document getFirstPage() throws IOException { // "url" is an instance field of the superclass Document page = Http.url(url).get(); - URL firstUrl = new URL("https://luscious.net" + page.select("div > div.item.thumbnail.ic_container > a").first().attr("href")); - LOGGER.info("First page is " + "https://luscious.net" + page.select("div > div.album_cover_item > a").first().attr("href")); - return Http.url(firstUrl).get(); + LOGGER.info("First page is " + url); + return page; } @Override public List getURLsFromPage(Document page) { List urls = new ArrayList<>(); - Elements urlElements = page.select(".icon-download"); + Elements urlElements = page.select("div.item.thumbnail.ic_container > a"); for (Element e : urlElements) { - urls.add(e.attr("href")); - } - - // This is here for pages with mp4s instead of images - String video_image = ""; - video_image = page.select("div > video > source").attr("src"); - if (!video_image.equals("")) { - urls.add(video_image); + urls.add(e.attr("abs:href")); } + return urls; } @Override public Document getNextPage(Document doc) throws IOException { - // Find next page - String nextPageUrl = "https://luscious.net" + doc.select("a.image_link[rel=next]").attr("href"); - // The more_like_this is here so we don't try to download the page that comes after the end of an album - if (nextPageUrl == "https://luscious.net" || - nextPageUrl.contains("more_like_this")) { - throw new IOException("No more pages"); + // luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser. + // Simply GET the nextPageUrl also works. Therefore, we do this... + Element nextPageElement = doc.select("div#next_page > div > a").first(); + if (nextPageElement == null) { + throw new IOException("No next page found."); } - return Http.url(nextPageUrl).get(); + return Http.url(nextPageElement.attr("abs:href")).get(); } @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern - .compile("^https?://luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); } throw new MalformedURLException("Expected luscious.net URL format: " - + "luscious.net/albums/albumname - got " + url - + " instead"); + + "luscious.net/albums/albumname \n members.luscious.net/albums/albumname - got " + url + " instead."); } @Override public void downloadURL(URL url, int index) { - addURLToDownload(url, getPrefix(index)); + lusciousThreadPool.addThread(new LusciousDownloadThread(url, index)); } + @Override + public DownloadThreadPool getThreadPool() { + return lusciousThreadPool; + } + + public class LusciousDownloadThread extends Thread { + private URL url; + private int index; + + public LusciousDownloadThread(URL url, int index) { + this.url = url; + this.index = index; + } + + @Override + public void run() { + try { + Document page = Http.url(url).retries(RETRY_COUNT).get(); + + String downloadUrl = page.select(".icon-download").attr("abs:href"); + if (downloadUrl.equals("")) { + // This is here for pages with mp4s instead of images. + downloadUrl = page.select("div > video > source").attr("src"); + if (!downloadUrl.equals("")) { + throw new IOException("Could not find download url for image or video."); + } + } + + //If a valid download url was found. + addURLToDownload(new URL(downloadUrl), getPrefix(index)); + + } catch (IOException e) { + LOGGER.error("Error downloadiong url " + url, e); + } + } + + } } diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/LusciousRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/LusciousRipperTest.java index 30526659..f8da140c 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/LusciousRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/LusciousRipperTest.java @@ -8,7 +8,28 @@ import com.rarchives.ripme.ripper.rippers.LusciousRipper; public class LusciousRipperTest extends RippersTest { public void testPahealRipper() throws IOException { // a photo set - LusciousRipper ripper = new LusciousRipper(new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/")); + LusciousRipper ripper = new LusciousRipper( + new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/")); testRipper(ripper); } + + public void testGetGID() throws IOException { + URL url = new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/"); + LusciousRipper ripper = new LusciousRipper(url); + assertEquals("h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609", ripper.getGID(url)); + } + + public void testGetNextPage() throws IOException { + URL multiPageAlbumUrl = new URL("https://luscious.net/albums/women-of-color_58/"); + LusciousRipper multiPageRipper = new LusciousRipper(multiPageAlbumUrl); + assert (multiPageRipper.getNextPage(multiPageRipper.getFirstPage()) != null); + + URL singlePageAlbumUrl = new URL("https://members.luscious.net/albums/bakaneko-navidarks_332097/"); + LusciousRipper singlePageRipper = new LusciousRipper(singlePageAlbumUrl); + try { + singlePageRipper.getNextPage(singlePageRipper.getFirstPage()); + } catch (IOException e) { + assertEquals("No next page found.", e.getMessage()); + } + } } \ No newline at end of file