1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-22 21:43:06 +02:00

Merge pull request #1170 from Tush-r/master

Improved luscious ripper fixed incomplete albums ripping.
This commit is contained in:
cyian-1756
2019-01-21 02:33:50 -05:00
committed by GitHub
2 changed files with 78 additions and 26 deletions

View File

@@ -13,12 +13,17 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
public class LusciousRipper extends AbstractHTMLRipper {
private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.
private Pattern p = Pattern.compile("^https?://(?:members.)?luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$");
private DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");
public LusciousRipper(URL url) throws IOException {
super(url);
super(url);
}
@Override
@@ -35,57 +40,83 @@ public class LusciousRipper extends AbstractHTMLRipper {
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
Document page = Http.url(url).get();
URL firstUrl = new URL("https://luscious.net" + page.select("div > div.item.thumbnail.ic_container > a").first().attr("href"));
LOGGER.info("First page is " + "https://luscious.net" + page.select("div > div.album_cover_item > a").first().attr("href"));
return Http.url(firstUrl).get();
LOGGER.info("First page is " + url);
return page;
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<>();
Elements urlElements = page.select(".icon-download");
Elements urlElements = page.select("div.item.thumbnail.ic_container > a");
for (Element e : urlElements) {
urls.add(e.attr("href"));
}
// This is here for pages with mp4s instead of images
String video_image = "";
video_image = page.select("div > video > source").attr("src");
if (!video_image.equals("")) {
urls.add(video_image);
urls.add(e.attr("abs:href"));
}
return urls;
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextPageUrl = "https://luscious.net" + doc.select("a.image_link[rel=next]").attr("href");
// The more_like_this is here so we don't try to download the page that comes after the end of an album
if (nextPageUrl == "https://luscious.net" ||
nextPageUrl.contains("more_like_this")) {
throw new IOException("No more pages");
// luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
// Simply GET the nextPageUrl also works. Therefore, we do this...
Element nextPageElement = doc.select("div#next_page > div > a").first();
if (nextPageElement == null) {
throw new IOException("No next page found.");
}
return Http.url(nextPageUrl).get();
return Http.url(nextPageElement.attr("abs:href")).get();
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern
.compile("^https?://luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected luscious.net URL format: "
+ "luscious.net/albums/albumname - got " + url
+ " instead");
+ "luscious.net/albums/albumname \n members.luscious.net/albums/albumname - got " + url + " instead.");
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
lusciousThreadPool.addThread(new LusciousDownloadThread(url, index));
}
@Override
public DownloadThreadPool getThreadPool() {
return lusciousThreadPool;
}
public class LusciousDownloadThread extends Thread {
private URL url;
private int index;
public LusciousDownloadThread(URL url, int index) {
this.url = url;
this.index = index;
}
@Override
public void run() {
try {
Document page = Http.url(url).retries(RETRY_COUNT).get();
String downloadUrl = page.select(".icon-download").attr("abs:href");
if (downloadUrl.equals("")) {
// This is here for pages with mp4s instead of images.
downloadUrl = page.select("div > video > source").attr("src");
if (!downloadUrl.equals("")) {
throw new IOException("Could not find download url for image or video.");
}
}
//If a valid download url was found.
addURLToDownload(new URL(downloadUrl), getPrefix(index));
} catch (IOException e) {
LOGGER.error("Error downloadiong url " + url, e);
}
}
}
}

View File

@@ -8,7 +8,28 @@ import com.rarchives.ripme.ripper.rippers.LusciousRipper;
public class LusciousRipperTest extends RippersTest {
public void testPahealRipper() throws IOException {
// a photo set
LusciousRipper ripper = new LusciousRipper(new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/"));
LusciousRipper ripper = new LusciousRipper(
new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/"));
testRipper(ripper);
}
public void testGetGID() throws IOException {
URL url = new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/");
LusciousRipper ripper = new LusciousRipper(url);
assertEquals("h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609", ripper.getGID(url));
}
public void testGetNextPage() throws IOException {
URL multiPageAlbumUrl = new URL("https://luscious.net/albums/women-of-color_58/");
LusciousRipper multiPageRipper = new LusciousRipper(multiPageAlbumUrl);
assert (multiPageRipper.getNextPage(multiPageRipper.getFirstPage()) != null);
URL singlePageAlbumUrl = new URL("https://members.luscious.net/albums/bakaneko-navidarks_332097/");
LusciousRipper singlePageRipper = new LusciousRipper(singlePageAlbumUrl);
try {
singlePageRipper.getNextPage(singlePageRipper.getFirstPage());
} catch (IOException e) {
assertEquals("No next page found.", e.getMessage());
}
}
}