1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-18 11:41:21 +02:00

LusciousRipper fixed

This commit is contained in:
Georgi Marinov
2023-11-04 17:29:48 +02:00
committed by soloturn
parent 6d7503facb
commit ce0e60c501

View File

@@ -1,27 +1,26 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class LusciousRipper extends AbstractHTMLRipper {
private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.
private static String albumid;
private static final Pattern P = Pattern.compile("^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net/albums/([-_.0-9a-zA-Z]+)/?");
private final DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");
public LusciousRipper(URL url) throws IOException {
super(url);
@@ -38,37 +37,48 @@ public class LusciousRipper extends AbstractHTMLRipper {
}
@Override
public Document getFirstPage() throws IOException {
return super.getFirstPage();
}
@Override
public List<String> getURLsFromPage(Document page) {
public List<String> getURLsFromPage(Document page) { // gets urls for all pages through the api
List<String> urls = new ArrayList<>();
Elements urlElements = page.select("div.item.thumbnail.ic_container > a");
for (Element e : urlElements) {
urls.add(e.attr("abs:href"));
int totalPages = 1;
for (int i = 1; i <= totalPages; i++) {
String APIStringWOVariables = "https://apicdn.luscious.net/graphql/nobatch/?operationName=PictureListInsideAlbum&query=%2520query%2520PictureListInsideAlbum%28%2524input%253A%2520PictureListInput%21%29%2520%257B%2520picture%2520%257B%2520list%28input%253A%2520%2524input%29%2520%257B%2520info%2520%257B%2520...FacetCollectionInfo%2520%257D%2520items%2520%257B%2520__typename%2520id%2520title%2520description%2520created%2520like_status%2520number_of_comments%2520number_of_favorites%2520moderation_status%2520width%2520height%2520resolution%2520aspect_ratio%2520url_to_original%2520url_to_video%2520is_animated%2520position%2520permissions%2520url%2520tags%2520%257B%2520category%2520text%2520url%2520%257D%2520thumbnails%2520%257B%2520width%2520height%2520size%2520url%2520%257D%2520%257D%2520%257D%2520%257D%2520%257D%2520fragment%2520FacetCollectionInfo%2520on%2520FacetCollectionInfo%2520%257B%2520page%2520has_next_page%2520has_previous_page%2520total_items%2520total_pages%2520items_per_page%2520url_complete%2520%257D%2520&variables=";
Connection con = Http.url(APIStringWOVariables + encodeVariablesPartOfURL(i, albumid)).method(Connection.Method.GET).retries(5).connection();
con.ignoreHttpErrors(true);
con.ignoreContentType(true);
con.userAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0");
Connection.Response res;
try {
res = con.execute();
} catch (IOException e) {
throw new RuntimeException(e);
}
String body = res.body();
JSONObject jsonObject = new JSONObject(body);
JSONObject data = jsonObject.getJSONObject("data");
JSONObject picture = data.getJSONObject("picture");
JSONObject list = picture.getJSONObject("list");
JSONArray items = list.getJSONArray("items");
JSONObject info = list.getJSONObject("info");
totalPages = info.getInt("total_pages");
for (int j = 0; j < items.length(); j++) {
JSONObject item = items.getJSONObject(j);
String urlToOriginal = item.getString("url_to_original");
urls.add(urlToOriginal);
}
}
return urls;
}
@Override
public Document getNextPage(Document doc) throws IOException {
// luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
// Simply GET the nextPageUrl also works. Therefore, we do this...
Element nextPageElement = doc.select("div#next_page > div > a").first();
if (nextPageElement == null) {
throw new IOException("No next page found.");
}
return Http.url(nextPageElement.attr("abs:href")).get();
}
@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m = P.matcher(url.toExternalForm());
if (m.matches()) {
albumid = m.group(1).split("_")[m.group(1).split("_").length - 1];
return m.group(1);
}
throw new MalformedURLException("Expected luscious.net URL format: "
@@ -76,79 +86,17 @@ public class LusciousRipper extends AbstractHTMLRipper {
}
@Override
public void downloadURL(URL url, int index) {
lusciousThreadPool.addThread(new LusciousDownloadThread(url, index));
protected void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
}
@Override
public DownloadThreadPool getThreadPool() {
return lusciousThreadPool;
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
// Sanitizes the url removing GET parameters and convert to legacy api url.
// "https://legacy.luscious.net/albums/albumname"
public static String encodeVariablesPartOfURL(int page, String albumId) {
try {
Matcher m = P.matcher(url.toString());
if (m.matches()) {
String sanitizedUrl = m.group();
sanitizedUrl = sanitizedUrl.replaceFirst(
"^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net",
"https://legacy.luscious.net");
return new URI(sanitizedUrl).toURL();
}
String json = "{\"input\":{\"filters\":[{\"name\":\"album_id\",\"value\":\"" + albumId + "\"}],\"display\":\"rating_all_time\",\"items_per_page\":50,\"page\":" + page + "}}";
throw new Exception("ERROR: Unable to sanitize url.");
} catch (Exception e) {
LOGGER.info("Error sanitizing the url.");
LOGGER.error(e);
return super.sanitizeURL(url);
return URLEncoder.encode(json, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new IllegalStateException("Could not encode variables");
}
}
@Override
public String normalizeUrl(String url) {
try {
return url.replaceFirst(
"^https?://(?:members\\.|legacy\\.)?luscious.net", "https://www.luscious.net");
} catch (Exception e) {
LOGGER.info("Error normalizing the url.");
LOGGER.error(e);
return super.normalizeUrl(url);
}
}
public class LusciousDownloadThread implements Runnable {
private final URL url;
private final int index;
public LusciousDownloadThread(URL url, int index) {
this.url = url;
this.index = index;
}
@Override
public void run() {
try {
Document page = Http.url(url).retries(RETRY_COUNT).get();
String downloadUrl = page.select(".icon-download").attr("abs:href");
if (downloadUrl.equals("")) {
// This is here for pages with mp4s instead of images.
downloadUrl = page.select("div > video > source").attr("src");
if (!downloadUrl.equals("")) {
throw new IOException("Could not find download url for image or video.");
}
}
//If a valid download url was found.
addURLToDownload(new URI(downloadUrl).toURL(), getPrefix(index));
} catch (IOException | URISyntaxException e) {
LOGGER.error("Error downloadiong url " + url, e);
}
}
}
}