LusciousRipper fixed

2025-08-18 19:51:35 +02:00 · 2023-11-04 17:29:48 +02:00
parent 6d7503facb
commit ce0e60c501
1 changed files with 45 additions and 97 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
@@ -1,27 +1,26 @@
 package com.rarchives.ripme.ripper.rippers;

 import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.ripper.DownloadThreadPool;
 import com.rarchives.ripme.utils.Http;
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.jsoup.Connection;
 import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;

 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
-import java.net.URI;
-import java.net.URISyntaxException;
 import java.net.URL;
+import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 public class LusciousRipper extends AbstractHTMLRipper {
-    private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.
+    private static String albumid;

    private static final Pattern P = Pattern.compile("^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net/albums/([-_.0-9a-zA-Z]+)/?");
-    private final DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");

    public LusciousRipper(URL url) throws IOException {
        super(url);
@@ -38,37 +37,48 @@ public class LusciousRipper extends AbstractHTMLRipper {
    }

    @Override
-    public Document getFirstPage() throws IOException {
-        return super.getFirstPage();
-    }
-
-    @Override
-    public List<String> getURLsFromPage(Document page) {
+    public List<String> getURLsFromPage(Document page) { // gets urls for all pages through the api
        List<String> urls = new ArrayList<>();
-        Elements urlElements = page.select("div.item.thumbnail.ic_container > a");
-        for (Element e : urlElements) {
-            urls.add(e.attr("abs:href"));
+        int totalPages = 1;
+
+        for (int i = 1; i <= totalPages; i++) {
+            String APIStringWOVariables = "https://apicdn.luscious.net/graphql/nobatch/?operationName=PictureListInsideAlbum&query=%2520query%2520PictureListInsideAlbum%28%2524input%253A%2520PictureListInput%21%29%2520%257B%2520picture%2520%257B%2520list%28input%253A%2520%2524input%29%2520%257B%2520info%2520%257B%2520...FacetCollectionInfo%2520%257D%2520items%2520%257B%2520__typename%2520id%2520title%2520description%2520created%2520like_status%2520number_of_comments%2520number_of_favorites%2520moderation_status%2520width%2520height%2520resolution%2520aspect_ratio%2520url_to_original%2520url_to_video%2520is_animated%2520position%2520permissions%2520url%2520tags%2520%257B%2520category%2520text%2520url%2520%257D%2520thumbnails%2520%257B%2520width%2520height%2520size%2520url%2520%257D%2520%257D%2520%257D%2520%257D%2520%257D%2520fragment%2520FacetCollectionInfo%2520on%2520FacetCollectionInfo%2520%257B%2520page%2520has_next_page%2520has_previous_page%2520total_items%2520total_pages%2520items_per_page%2520url_complete%2520%257D%2520&variables=";
+            Connection con = Http.url(APIStringWOVariables + encodeVariablesPartOfURL(i, albumid)).method(Connection.Method.GET).retries(5).connection();
+            con.ignoreHttpErrors(true);
+            con.ignoreContentType(true);
+            con.userAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0");
+            Connection.Response res;
+            try {
+                res = con.execute();
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+            String body = res.body();
+
+            JSONObject jsonObject = new JSONObject(body);
+
+            JSONObject data = jsonObject.getJSONObject("data");
+            JSONObject picture = data.getJSONObject("picture");
+            JSONObject list = picture.getJSONObject("list");
+            JSONArray items = list.getJSONArray("items");
+            JSONObject info = list.getJSONObject("info");
+            totalPages = info.getInt("total_pages");
+
+            for (int j = 0; j < items.length(); j++) {
+                JSONObject item = items.getJSONObject(j);
+                String urlToOriginal = item.getString("url_to_original");
+                urls.add(urlToOriginal);
+            }
        }

        return urls;
    }

-    @Override
-    public Document getNextPage(Document doc) throws IOException {
-        // luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
-        // Simply GET the nextPageUrl also works. Therefore, we do this...
-        Element nextPageElement = doc.select("div#next_page > div > a").first();
-        if (nextPageElement == null) {
-            throw new IOException("No next page found.");
-        }
-
-        return Http.url(nextPageElement.attr("abs:href")).get();
-    }
-
    @Override
    public String getGID(URL url) throws MalformedURLException {
        Matcher m = P.matcher(url.toExternalForm());
        if (m.matches()) {
+            albumid = m.group(1).split("_")[m.group(1).split("_").length - 1];
            return m.group(1);
        }
        throw new MalformedURLException("Expected luscious.net URL format: "
@@ -76,79 +86,17 @@ public class LusciousRipper extends AbstractHTMLRipper {
    }

    @Override
-    public void downloadURL(URL url, int index) {
-        lusciousThreadPool.addThread(new LusciousDownloadThread(url, index));
+    protected void downloadURL(URL url, int index) {
+        addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
    }

-    @Override
-    public DownloadThreadPool getThreadPool() {
-        return lusciousThreadPool;
-    }
-
-    @Override
-    public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
-        // Sanitizes the url removing GET parameters and convert to legacy api url.
-        // "https://legacy.luscious.net/albums/albumname"
+    public static String encodeVariablesPartOfURL(int page, String albumId) {
        try {
-            Matcher m = P.matcher(url.toString());
-            if (m.matches()) {
-                String sanitizedUrl = m.group();
-                sanitizedUrl = sanitizedUrl.replaceFirst(
-                        "^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net",
-                        "https://legacy.luscious.net");
-                return new URI(sanitizedUrl).toURL();
-            }
+            String json = "{\"input\":{\"filters\":[{\"name\":\"album_id\",\"value\":\"" + albumId + "\"}],\"display\":\"rating_all_time\",\"items_per_page\":50,\"page\":" + page + "}}";

-            throw new Exception("ERROR: Unable to sanitize url.");
-        } catch (Exception e) {
-            LOGGER.info("Error sanitizing the url.");
-            LOGGER.error(e);
-            return super.sanitizeURL(url);
+            return URLEncoder.encode(json, "UTF-8");
+        } catch (UnsupportedEncodingException e) {
+            throw new IllegalStateException("Could not encode variables");
        }
    }
-
-    @Override
-    public String normalizeUrl(String url) {
-        try {
-            return url.replaceFirst(
-                    "^https?://(?:members\\.|legacy\\.)?luscious.net", "https://www.luscious.net");
-        } catch (Exception e) {
-            LOGGER.info("Error normalizing the url.");
-            LOGGER.error(e);
-            return super.normalizeUrl(url);
-        }
-    }
-
-    public class LusciousDownloadThread implements Runnable {
-        private final URL url;
-        private final int index;
-
-        public LusciousDownloadThread(URL url, int index) {
-            this.url = url;
-            this.index = index;
-        }
-
-        @Override
-        public void run() {
-            try {
-                Document page = Http.url(url).retries(RETRY_COUNT).get();
-
-                String downloadUrl = page.select(".icon-download").attr("abs:href");
-                if (downloadUrl.equals("")) {
-                    // This is here for pages with mp4s instead of images.
-                    downloadUrl = page.select("div > video > source").attr("src");
-                    if (!downloadUrl.equals("")) {
-                        throw new IOException("Could not find download url for image or video.");
-                    }
-                }
-
-                //If a valid download url was found.
-                addURLToDownload(new URI(downloadUrl).toURL(), getPrefix(index));
-
-            } catch (IOException | URISyntaxException e) {
-                LOGGER.error("Error downloadiong url " + url, e);
-            }
-        }
-
-    }
 }