1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-23 05:53:02 +02:00

Merge pull request #1170 from Tush-r/master

Improved luscious ripper fixed incomplete albums ripping.
This commit is contained in:
cyian-1756
2019-01-21 02:33:50 -05:00
committed by GitHub
2 changed files with 78 additions and 26 deletions

View File

@@ -13,9 +13,14 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
public class LusciousRipper extends AbstractHTMLRipper { public class LusciousRipper extends AbstractHTMLRipper {
private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.
private Pattern p = Pattern.compile("^https?://(?:members.)?luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$");
private DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");
public LusciousRipper(URL url) throws IOException { public LusciousRipper(URL url) throws IOException {
super(url); super(url);
@@ -35,57 +40,83 @@ public class LusciousRipper extends AbstractHTMLRipper {
public Document getFirstPage() throws IOException { public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass // "url" is an instance field of the superclass
Document page = Http.url(url).get(); Document page = Http.url(url).get();
URL firstUrl = new URL("https://luscious.net" + page.select("div > div.item.thumbnail.ic_container > a").first().attr("href")); LOGGER.info("First page is " + url);
LOGGER.info("First page is " + "https://luscious.net" + page.select("div > div.album_cover_item > a").first().attr("href")); return page;
return Http.url(firstUrl).get();
} }
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<>(); List<String> urls = new ArrayList<>();
Elements urlElements = page.select(".icon-download"); Elements urlElements = page.select("div.item.thumbnail.ic_container > a");
for (Element e : urlElements) { for (Element e : urlElements) {
urls.add(e.attr("href")); urls.add(e.attr("abs:href"));
} }
// This is here for pages with mp4s instead of images
String video_image = "";
video_image = page.select("div > video > source").attr("src");
if (!video_image.equals("")) {
urls.add(video_image);
}
return urls; return urls;
} }
@Override @Override
public Document getNextPage(Document doc) throws IOException { public Document getNextPage(Document doc) throws IOException {
// Find next page // luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
String nextPageUrl = "https://luscious.net" + doc.select("a.image_link[rel=next]").attr("href"); // Simply GET the nextPageUrl also works. Therefore, we do this...
// The more_like_this is here so we don't try to download the page that comes after the end of an album Element nextPageElement = doc.select("div#next_page > div > a").first();
if (nextPageUrl == "https://luscious.net" || if (nextPageElement == null) {
nextPageUrl.contains("more_like_this")) { throw new IOException("No next page found.");
throw new IOException("No more pages");
} }
return Http.url(nextPageUrl).get(); return Http.url(nextPageElement.attr("abs:href")).get();
} }
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern
.compile("^https?://luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} }
throw new MalformedURLException("Expected luscious.net URL format: " throw new MalformedURLException("Expected luscious.net URL format: "
+ "luscious.net/albums/albumname - got " + url + "luscious.net/albums/albumname \n members.luscious.net/albums/albumname - got " + url + " instead.");
+ " instead");
} }
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index)); lusciousThreadPool.addThread(new LusciousDownloadThread(url, index));
}
@Override
public DownloadThreadPool getThreadPool() {
return lusciousThreadPool;
}
public class LusciousDownloadThread extends Thread {
private URL url;
private int index;
public LusciousDownloadThread(URL url, int index) {
this.url = url;
this.index = index;
}
@Override
public void run() {
try {
Document page = Http.url(url).retries(RETRY_COUNT).get();
String downloadUrl = page.select(".icon-download").attr("abs:href");
if (downloadUrl.equals("")) {
// This is here for pages with mp4s instead of images.
downloadUrl = page.select("div > video > source").attr("src");
if (!downloadUrl.equals("")) {
throw new IOException("Could not find download url for image or video.");
}
}
//If a valid download url was found.
addURLToDownload(new URL(downloadUrl), getPrefix(index));
} catch (IOException e) {
LOGGER.error("Error downloadiong url " + url, e);
}
} }
} }
}

View File

@@ -8,7 +8,28 @@ import com.rarchives.ripme.ripper.rippers.LusciousRipper;
public class LusciousRipperTest extends RippersTest { public class LusciousRipperTest extends RippersTest {
public void testPahealRipper() throws IOException { public void testPahealRipper() throws IOException {
// a photo set // a photo set
LusciousRipper ripper = new LusciousRipper(new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/")); LusciousRipper ripper = new LusciousRipper(
new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/"));
testRipper(ripper); testRipper(ripper);
} }
public void testGetGID() throws IOException {
URL url = new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/");
LusciousRipper ripper = new LusciousRipper(url);
assertEquals("h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609", ripper.getGID(url));
}
public void testGetNextPage() throws IOException {
URL multiPageAlbumUrl = new URL("https://luscious.net/albums/women-of-color_58/");
LusciousRipper multiPageRipper = new LusciousRipper(multiPageAlbumUrl);
assert (multiPageRipper.getNextPage(multiPageRipper.getFirstPage()) != null);
URL singlePageAlbumUrl = new URL("https://members.luscious.net/albums/bakaneko-navidarks_332097/");
LusciousRipper singlePageRipper = new LusciousRipper(singlePageAlbumUrl);
try {
singlePageRipper.getNextPage(singlePageRipper.getFirstPage());
} catch (IOException e) {
assertEquals("No next page found.", e.getMessage());
}
}
} }