From d1fa53dc8c04e2762bf9f864489772e6f5b0f260 Mon Sep 17 00:00:00 2001 From: PaaaulZ <46759927+PaaaulZ@users.noreply.github.com> Date: Thu, 17 Dec 2020 01:45:41 +0100 Subject: [PATCH 1/4] Added support for scrolller.com No support for the "sorting" parameter --- .../ripme/ripper/rippers/ScrolllerRipper.java | 213 ++++++++++++++++++ .../ripper/rippers/ScrolllerRipperTest.java | 55 +++++ 2 files changed, 268 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/ScrolllerRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java new file mode 100644 index 00000000..afadcd1f --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java @@ -0,0 +1,213 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import com.rarchives.ripme.ripper.AbstractJSONRipper; + +public class ScrolllerRipper extends AbstractJSONRipper { + + public ScrolllerRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "scrolller"; + } + @Override + public String getDomain() { + return "scrolller.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + // Typical URL is: https://scrolller.com/r/subreddit + // Parameters like "filter" and "sort" can be passed (ex: https://scrolller.com/r/subreddit?filter=xxx&sort=yyyy) + Pattern p = Pattern.compile("^https?://scrolller\\.com/r/([a-zA-Z0-9]+).*?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected scrolller.com URL format: " + + "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead"); + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + + + private JSONObject prepareQuery(String iterator, String gid) throws IOException { + + // Prepares the JSONObject we need to pass to the GraphQL query. + + String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }"; + String filterString = convertFilterString(getParameter(this.url,"filter")); + + JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)); + + if (iterator != null) { + // Iterator is not present on the first page + variablesObject.put("iterator", iterator); + } + if (!filterString.equals("NOFILTER")) { + // We could also pass filter="" but not including it if not present is cleaner + variablesObject.put("filter", filterString); + } + + JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString); + + return getPosts(finalQueryObject); + + } + + + public String convertFilterString(String filterParameter) { + // Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query + // I could basically remove the last "s" and uppercase instead of this switch statement but this looks easier to read. + switch (filterParameter) { + case "pictures": + return "PICTURE"; + case "videos": + return "VIDEO"; + case "albums": + return "ALBUM"; + case "": + return "NOFILTER"; + default: + LOGGER.error(String.format("Invalid filter %s using no filter",filterParameter)); + return ""; + } + } + + public String getParameter(URL url, String parameter) throws MalformedURLException { + // Gets passed parameters from the URL + String toReplace = String.format("https://scrolller.com/r/%s?",getGID(url)); + List args= URLEncodedUtils.parse(url.toExternalForm(), Charset.defaultCharset()); + for (NameValuePair arg:args) { + // First parameter contains part of the url so we have to remove it + // Ex: for the url https://scrolller.com/r/CatsStandingUp?filter=xxxx&sort=yyyy + // 1) arg.getName() => https://scrolller.com/r/CatsStandingUp?filter + // 2) arg.getName() => sort + + if (arg.getName().replace(toReplace,"").equals((parameter))) { + return arg.getValue(); + } + } + return ""; + } + + private JSONObject getPosts(JSONObject data) { + // The actual GraphQL query call + + // JSoup wants POST data in key=value but I need to write a JSON body so I can't use it... + try { + + String url = "https://api.scrolller.com/api/v2/graphql"; + + URL obj = new URL(url); + HttpURLConnection conn = (HttpURLConnection) obj.openConnection(); + conn.setReadTimeout(5000); + conn.addRequestProperty("Accept-Language", "en-US,en;q=0.8"); + conn.addRequestProperty("User-Agent", "Mozilla"); + conn.addRequestProperty("Referer", "scrolller.com"); + + conn.setDoOutput(true); + + OutputStreamWriter w = new OutputStreamWriter(conn.getOutputStream(), "UTF-8"); + + w.write(data.toString()); + w.close(); + + BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); + String inputLine; + StringBuffer html = new StringBuffer(); + + while ((inputLine = in.readLine()) != null) { + html.append(inputLine); + } + + in.close(); + conn.disconnect(); + + return new JSONObject(html.toString()); + + } catch (Exception e) { + e.printStackTrace(); + } + + return new JSONObject("{}"); +} + + + @Override + protected List getURLsFromJSON(JSONObject json) throws JSONException { + JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items"); + int bestArea = 0; + String bestUrl = ""; + List list = new ArrayList<>(); + + + for (Object item : itemsList) { + JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources"); + for (Object sourceTMP : sourcesTMP) + { + int widthTMP = ((JSONObject) sourceTMP).getInt("width"); + int heightTMP = ((JSONObject) sourceTMP).getInt("height"); + int areaTMP = widthTMP * heightTMP; + + if (areaTMP > bestArea) { + // Better way to determine best image? + bestArea = widthTMP; + bestUrl = ((JSONObject) sourceTMP).getString("url"); + } + } + list.add(bestUrl); + bestUrl = ""; + bestArea = 0; + } + return list; + } + + @Override + protected JSONObject getFirstPage() throws IOException { + if (getParameter(url,"sort") != null) { + // I need support for the WebSocket protocol to implement sorting. + // A GraphQL query to the API with the "sortBy" variable can't come from a POST request or it will return error 500, it has to come from a WebSocket. + LOGGER.warn("Sorting is not currently implemented and it will be ignored"); + } + return prepareQuery(null, this.getGID(url)); + } + + @Override + public JSONObject getNextPage(JSONObject source) throws IOException { + // Every call the the API contains an "iterator" string that we need to pass to the API to get the next page + // Checking if iterator is null is not working for some reason, hence why the weird "iterator.toString().equals("null")" + Object iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator"); + if (!iterator.toString().equals("null")) { + return prepareQuery(iterator.toString(), this.getGID(url)); + } else { + return null; + } + + } + +} \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ScrolllerRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ScrolllerRipperTest.java new file mode 100644 index 00000000..feef282b --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ScrolllerRipperTest.java @@ -0,0 +1,55 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import com.rarchives.ripme.ripper.rippers.ScrolllerRipper; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + +public class ScrolllerRipperTest extends RippersTest { + /*@Test + public void testScrolllerGID() throws IOException { + Map testURLs = new HashMap<>(); + + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp"), "CatsStandingUp"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=pictures"), "CatsStandingUp"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?sort=top&filter=pictures"), "CatsStandingUp"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=pictures&sort=top"), "CatsStandingUp"); + for (URL url : testURLs.keySet()) { + ScrolllerRipper ripper = new ScrolllerRipper(url); + ripper.setup(); + System.out.println(testURLs.get(url) + " |=>| " + ripper.getGID(ripper.getURL())); + Assertions.assertEquals(testURLs.get(url), ripper.getGID(ripper.getURL())); + deleteDir(ripper.getWorkingDir()); + } + }*/ + + @Test + public void testScrolllerFilterRegex() throws IOException { + Map testURLs = new HashMap<>(); + + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp"), ""); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=pictures"), "PICTURE"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=videos"), "VIDEO"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=albums"), "ALBUM"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?sort=top&filter=pictures"), "PICTURE"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?sort=top&filter=videos"), "VIDEO"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?sort=top&filter=albums"), "ALBUM"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=pictures&sort=top"), "PICTURE"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=videos&sort=top"), "VIDEO"); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=albums&sort=top"), "ALBUM"); + for (URL url : testURLs.keySet()) { + ScrolllerRipper ripper = new ScrolllerRipper(url); + ripper.setup(); + System.out.println(url + " ==> " + testURLs.get(url) + " => " + ripper.convertFilterString(ripper.getParameter(ripper.getURL(),"filter"))); + Assertions.assertEquals(testURLs.get(url), ripper.convertFilterString(ripper.getParameter(ripper.getURL(),"filter"))); + deleteDir(ripper.getWorkingDir()); + } + } + + + +} From d2ac05f8f515d08966a2412c88e35d6fa74025c4 Mon Sep 17 00:00:00 2001 From: PaaaulZ <46759927+PaaaulZ@users.noreply.github.com> Date: Sat, 2 Jan 2021 03:15:47 +0100 Subject: [PATCH 2/4] Fixed failing test --- .../ripme/tst/ripper/rippers/ScrolllerRipperTest.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ScrolllerRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ScrolllerRipperTest.java index feef282b..c7bf3d7d 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ScrolllerRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ScrolllerRipperTest.java @@ -10,7 +10,7 @@ import java.util.HashMap; import java.util.Map; public class ScrolllerRipperTest extends RippersTest { - /*@Test + @Test public void testScrolllerGID() throws IOException { Map testURLs = new HashMap<>(); @@ -21,17 +21,16 @@ public class ScrolllerRipperTest extends RippersTest { for (URL url : testURLs.keySet()) { ScrolllerRipper ripper = new ScrolllerRipper(url); ripper.setup(); - System.out.println(testURLs.get(url) + " |=>| " + ripper.getGID(ripper.getURL())); Assertions.assertEquals(testURLs.get(url), ripper.getGID(ripper.getURL())); deleteDir(ripper.getWorkingDir()); } - }*/ + } @Test public void testScrolllerFilterRegex() throws IOException { Map testURLs = new HashMap<>(); - testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp"), ""); + testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp"), "NOFILTER"); testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=pictures"), "PICTURE"); testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=videos"), "VIDEO"); testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=albums"), "ALBUM"); @@ -44,7 +43,6 @@ public class ScrolllerRipperTest extends RippersTest { for (URL url : testURLs.keySet()) { ScrolllerRipper ripper = new ScrolllerRipper(url); ripper.setup(); - System.out.println(url + " ==> " + testURLs.get(url) + " => " + ripper.convertFilterString(ripper.getParameter(ripper.getURL(),"filter"))); Assertions.assertEquals(testURLs.get(url), ripper.convertFilterString(ripper.getParameter(ripper.getURL(),"filter"))); deleteDir(ripper.getWorkingDir()); } From bfd0b395c7c4e7dad42aa35e4e4e6e626d1dae63 Mon Sep 17 00:00:00 2001 From: PaaaulZ <46759927+PaaaulZ@users.noreply.github.com> Date: Sat, 2 Jan 2021 08:03:00 +0100 Subject: [PATCH 3/4] Added support for sort parameter and WebSocket dependency --- java | 0 pom.xml | 5 + .../ripme/ripper/rippers/ScrolllerRipper.java | 236 ++++++++++++++---- 3 files changed, 188 insertions(+), 53 deletions(-) create mode 100644 java diff --git a/java b/java new file mode 100644 index 00000000..e69de29b diff --git a/pom.xml b/pom.xml index fb1bb42e..5432aa53 100644 --- a/pom.xml +++ b/pom.xml @@ -83,6 +83,11 @@ httpmime 4.3.3 + + org.java-websocket + Java-WebSocket + 1.5.1 + diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java index afadcd1f..a333eac6 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java @@ -4,17 +4,21 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; -import java.net.HttpURLConnection; -import java.net.MalformedURLException; -import java.net.URL; +import java.net.*; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.oracle.truffle.js.nodes.access.IteratorStepNode; +import com.oracle.truffle.js.runtime.builtins.JSON; +import org.java_websocket.client.WebSocketClient; + import org.apache.http.NameValuePair; import org.apache.http.client.utils.URLEncodedUtils; +import org.java_websocket.handshake.ServerHandshake; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -46,7 +50,7 @@ public class ScrolllerRipper extends AbstractJSONRipper { return m.group(1); } throw new MalformedURLException("Expected scrolller.com URL format: " + - "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead"); + "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead"); } @Override @@ -55,34 +59,56 @@ public class ScrolllerRipper extends AbstractJSONRipper { } - private JSONObject prepareQuery(String iterator, String gid) throws IOException { + private JSONObject prepareQuery(String iterator, String gid, String sortByString) throws IOException, URISyntaxException { // Prepares the JSONObject we need to pass to the GraphQL query. - String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }"; - String filterString = convertFilterString(getParameter(this.url,"filter")); + if (sortByString.equals("")) { + // Sorting not selected + String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }"; + String filterString = convertFilterString(getParameter(this.url,"filter")); - JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)); + JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)); - if (iterator != null) { - // Iterator is not present on the first page - variablesObject.put("iterator", iterator); + if (iterator != null) { + // Iterator is not present on the first page + variablesObject.put("iterator", iterator); + } + if (!filterString.equals("NOFILTER")) { + // We could also pass filter="" but not including it if not present is cleaner + variablesObject.put("filter", filterString); + } + + JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString); + + return getPosts(finalQueryObject); } - if (!filterString.equals("NOFILTER")) { - // We could also pass filter="" but not including it if not present is cleaner - variablesObject.put("filter", filterString); + else { + + String queryString = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }"; + String filterString = convertFilterString(getParameter(this.url,"filter")); + + JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase()); + + if (iterator != null) { + // Iterator is not present on the first page + variablesObject.put("iterator", iterator); + } + if (!filterString.equals("NOFILTER")) { + // We could also pass filter="" but not including it if not present is cleaner + variablesObject.put("filter", filterString); + } + + JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString); + + return getPostsSorted(finalQueryObject); } - - JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString); - - return getPosts(finalQueryObject); - } public String convertFilterString(String filterParameter) { // Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query - // I could basically remove the last "s" and uppercase instead of this switch statement but this looks easier to read. + // I could basically remove the last "s" and call toUpperCase instead of this switch statement but this looks easier to read. switch (filterParameter) { case "pictures": return "PICTURE"; @@ -118,9 +144,7 @@ public class ScrolllerRipper extends AbstractJSONRipper { private JSONObject getPosts(JSONObject data) { // The actual GraphQL query call - // JSoup wants POST data in key=value but I need to write a JSON body so I can't use it... try { - String url = "https://api.scrolller.com/api/v2/graphql"; URL obj = new URL(url); @@ -139,75 +163,181 @@ public class ScrolllerRipper extends AbstractJSONRipper { BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); String inputLine; - StringBuffer html = new StringBuffer(); + StringBuffer jsonString = new StringBuffer(); while ((inputLine = in.readLine()) != null) { - html.append(inputLine); + jsonString.append(inputLine); } in.close(); conn.disconnect(); - return new JSONObject(html.toString()); + return new JSONObject(jsonString.toString()); } catch (Exception e) { e.printStackTrace(); } return new JSONObject("{}"); -} + } + + private JSONObject getPostsSorted(JSONObject data) throws MalformedURLException { + + // The actual GraphQL query call (if sort parameter is present) + try { + + ArrayList postsJsonStrings = new ArrayList<>(); + + WebSocketClient wsc = new WebSocketClient(new URI("wss://api.scrolller.com/api/v2/graphql")) { + @Override + public void onOpen(ServerHandshake serverHandshake) { + // As soon as the WebSocket connects send our query + this.send(data.toString()); + } + + @Override + public void onMessage(String s) { + postsJsonStrings.add(s); + if (s.contains("{\"data\":{\"fetchSubreddit\":{\"__typename\":\"Iterator\",\"iterator\":")) { + // Iterator is the last field returned, once we received it we can close the connection. + this.close(); + } + } + + @Override + public void onClose(int i, String s, boolean b) { + } + + @Override + public void onError(Exception e) { + LOGGER.error(String.format("WebSocket error, server reported %s", e.getMessage())); + } + }; + wsc.connect(); + + while (!wsc.isClosed()) { + // Posts list is not over until the connection closes. + } + + JSONObject finalObject = new JSONObject(); + JSONArray posts = new JSONArray(); + + // Iterator is the last object in the post list, let's duplicate it in his own object for clarity. + finalObject.put("iterator", new JSONObject(postsJsonStrings.get(postsJsonStrings.size()-1))); + + for (String postString : postsJsonStrings) { + posts.put(new JSONObject(postString)); + } + finalObject.put("posts", posts); + + return finalObject; + + + } catch (URISyntaxException ue) { + // Nothing to catch, it's an hardcoded URI. + } + + return null; + } @Override protected List getURLsFromJSON(JSONObject json) throws JSONException { - JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items"); - int bestArea = 0; - String bestUrl = ""; - List list = new ArrayList<>(); + + if (json.has("posts")) { + // If the JSONObject contains the key "posts" it's our custom JSON made after the WebSocket call. + + JSONArray itemsList = json.getJSONArray("posts"); + int bestArea = 0; + String bestUrl = ""; + List list = new ArrayList<>(); - for (Object item : itemsList) { - JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources"); - for (Object sourceTMP : sourcesTMP) - { - int widthTMP = ((JSONObject) sourceTMP).getInt("width"); - int heightTMP = ((JSONObject) sourceTMP).getInt("height"); - int areaTMP = widthTMP * heightTMP; + for (Object item : itemsList) { + if (((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) { + // Is it really a post? It could be the subreddit description or the iterator (first and last item) + JSONArray sourcesTMP = ((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").getJSONArray("mediaSources"); + for (Object sourceTMP : sourcesTMP) + { + int widthTMP = ((JSONObject) sourceTMP).getInt("width"); + int heightTMP = ((JSONObject) sourceTMP).getInt("height"); + int areaTMP = widthTMP * heightTMP; + + if (areaTMP > bestArea) { + // Better way to determine best image? + bestArea = widthTMP; + bestUrl = ((JSONObject) sourceTMP).getString("url"); + } + } + list.add(bestUrl); + bestUrl = ""; + bestArea = 0; - if (areaTMP > bestArea) { - // Better way to determine best image? - bestArea = widthTMP; - bestUrl = ((JSONObject) sourceTMP).getString("url"); } } - list.add(bestUrl); - bestUrl = ""; - bestArea = 0; + return list; + + } else { + JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items"); + int bestArea = 0; + String bestUrl = ""; + List list = new ArrayList<>(); + + + for (Object item : itemsList) { + JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources"); + for (Object sourceTMP : sourcesTMP) + { + int widthTMP = ((JSONObject) sourceTMP).getInt("width"); + int heightTMP = ((JSONObject) sourceTMP).getInt("height"); + int areaTMP = widthTMP * heightTMP; + + if (areaTMP > bestArea) { + // Better way to determine best image? + bestArea = widthTMP; + bestUrl = ((JSONObject) sourceTMP).getString("url"); + } + } + list.add(bestUrl); + bestUrl = ""; + bestArea = 0; + } + return list; } - return list; } @Override protected JSONObject getFirstPage() throws IOException { - if (getParameter(url,"sort") != null) { - // I need support for the WebSocket protocol to implement sorting. - // A GraphQL query to the API with the "sortBy" variable can't come from a POST request or it will return error 500, it has to come from a WebSocket. - LOGGER.warn("Sorting is not currently implemented and it will be ignored"); + try { + return prepareQuery(null, this.getGID(url), getParameter(url,"sort")); + } catch (URISyntaxException e) { + LOGGER.error(String.format("Error obtaining first page: %s", e.getMessage())); + return null; } - return prepareQuery(null, this.getGID(url)); } @Override public JSONObject getNextPage(JSONObject source) throws IOException { // Every call the the API contains an "iterator" string that we need to pass to the API to get the next page // Checking if iterator is null is not working for some reason, hence why the weird "iterator.toString().equals("null")" - Object iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator"); + + Object iterator = null; + if (source.has("iterator")) { + // sorted + iterator = source.getJSONObject("iterator").getJSONObject("data").getJSONObject("fetchSubreddit").get("iterator"); + } else { + iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator"); + } if (!iterator.toString().equals("null")) { - return prepareQuery(iterator.toString(), this.getGID(url)); + try { + return prepareQuery(iterator.toString(), this.getGID(url), getParameter(url,"sort")); + } catch (URISyntaxException e) { + LOGGER.error(String.format("Error changing page: %s", e.getMessage())); + return null; + } } else { return null; } - } } \ No newline at end of file From 72a697eda873829a4b69c348c0045ced66f6b41b Mon Sep 17 00:00:00 2001 From: PaaaulZ <46759927+PaaaulZ@users.noreply.github.com> Date: Sat, 2 Jan 2021 20:36:03 +0100 Subject: [PATCH 4/4] Refactor, removed duplicated code. Fixed bugs --- .../ripme/ripper/rippers/ScrolllerRipper.java | 146 ++++++------------ 1 file changed, 48 insertions(+), 98 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java index a333eac6..7e0c1c46 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java @@ -8,12 +8,9 @@ import java.net.*; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; -import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.oracle.truffle.js.nodes.access.IteratorStepNode; -import com.oracle.truffle.js.runtime.builtins.JSON; import org.java_websocket.client.WebSocketClient; import org.apache.http.NameValuePair; @@ -61,55 +58,31 @@ public class ScrolllerRipper extends AbstractJSONRipper { private JSONObject prepareQuery(String iterator, String gid, String sortByString) throws IOException, URISyntaxException { - // Prepares the JSONObject we need to pass to the GraphQL query. + String QUERY_NOSORT = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }"; + String QUERY_SORT = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }"; - if (sortByString.equals("")) { - // Sorting not selected - String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }"; - String filterString = convertFilterString(getParameter(this.url,"filter")); + String filterString = convertFilterString(getParameter(this.url,"filter")); - JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)); + JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase()); + JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", sortByString.equals("") ? QUERY_NOSORT : QUERY_SORT); - if (iterator != null) { - // Iterator is not present on the first page - variablesObject.put("iterator", iterator); - } - if (!filterString.equals("NOFILTER")) { - // We could also pass filter="" but not including it if not present is cleaner - variablesObject.put("filter", filterString); - } - - JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString); - - return getPosts(finalQueryObject); + if (iterator != null) { + // Iterator is not present on the first page + variablesObject.put("iterator", iterator); } - else { - - String queryString = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }"; - String filterString = convertFilterString(getParameter(this.url,"filter")); - - JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase()); - - if (iterator != null) { - // Iterator is not present on the first page - variablesObject.put("iterator", iterator); - } - if (!filterString.equals("NOFILTER")) { - // We could also pass filter="" but not including it if not present is cleaner - variablesObject.put("filter", filterString); - } - - JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString); - - return getPostsSorted(finalQueryObject); + if (!filterString.equals("NOFILTER")) { + variablesObject.put("filter", filterString); } + + return sortByString.equals("") ? getPosts(finalQueryObject) : getPostsSorted(finalQueryObject); + } public String convertFilterString(String filterParameter) { // Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query // I could basically remove the last "s" and call toUpperCase instead of this switch statement but this looks easier to read. - switch (filterParameter) { + switch (filterParameter.toLowerCase()) { case "pictures": return "PICTURE"; case "videos": @@ -134,7 +107,7 @@ public class ScrolllerRipper extends AbstractJSONRipper { // 1) arg.getName() => https://scrolller.com/r/CatsStandingUp?filter // 2) arg.getName() => sort - if (arg.getName().replace(toReplace,"").equals((parameter))) { + if (arg.getName().replace(toReplace,"").toLowerCase().equals((parameter))) { return arg.getValue(); } } @@ -198,8 +171,7 @@ public class ScrolllerRipper extends AbstractJSONRipper { @Override public void onMessage(String s) { postsJsonStrings.add(s); - if (s.contains("{\"data\":{\"fetchSubreddit\":{\"__typename\":\"Iterator\",\"iterator\":")) { - // Iterator is the last field returned, once we received it we can close the connection. + if (new JSONObject(s).getJSONObject("data").getJSONObject("fetchSubreddit").has("iterator")) { this.close(); } } @@ -230,6 +202,11 @@ public class ScrolllerRipper extends AbstractJSONRipper { } finalObject.put("posts", posts); + if (finalObject.getJSONArray("posts").length() == 1 && !finalObject.getJSONArray("posts").getJSONObject(0).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) { + // Only iterator, no posts. + return null; + } + return finalObject; @@ -244,66 +221,38 @@ public class ScrolllerRipper extends AbstractJSONRipper { @Override protected List getURLsFromJSON(JSONObject json) throws JSONException { - if (json.has("posts")) { - // If the JSONObject contains the key "posts" it's our custom JSON made after the WebSocket call. + boolean sortRequested = json.has("posts"); - JSONArray itemsList = json.getJSONArray("posts"); - int bestArea = 0; - String bestUrl = ""; - List list = new ArrayList<>(); + int bestArea = 0; + String bestUrl = ""; + List list = new ArrayList<>(); + JSONArray itemsList = sortRequested ? json.getJSONArray("posts") : json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items"); - for (Object item : itemsList) { - if (((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) { - // Is it really a post? It could be the subreddit description or the iterator (first and last item) - JSONArray sourcesTMP = ((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").getJSONArray("mediaSources"); - for (Object sourceTMP : sourcesTMP) - { - int widthTMP = ((JSONObject) sourceTMP).getInt("width"); - int heightTMP = ((JSONObject) sourceTMP).getInt("height"); - int areaTMP = widthTMP * heightTMP; + for (Object item : itemsList) { - if (areaTMP > bestArea) { - // Better way to determine best image? - bestArea = widthTMP; - bestUrl = ((JSONObject) sourceTMP).getString("url"); - } - } - list.add(bestUrl); - bestUrl = ""; - bestArea = 0; + if (sortRequested && !((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) { + continue; + } + JSONArray sourcesTMP = sortRequested ? ((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").getJSONArray("mediaSources") : ((JSONObject) item).getJSONArray("mediaSources"); + for (Object sourceTMP : sourcesTMP) + { + int widthTMP = ((JSONObject) sourceTMP).getInt("width"); + int heightTMP = ((JSONObject) sourceTMP).getInt("height"); + int areaTMP = widthTMP * heightTMP; + + if (areaTMP > bestArea) { + bestArea = widthTMP; + bestUrl = ((JSONObject) sourceTMP).getString("url"); } } - return list; - - } else { - JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items"); - int bestArea = 0; - String bestUrl = ""; - List list = new ArrayList<>(); - - - for (Object item : itemsList) { - JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources"); - for (Object sourceTMP : sourcesTMP) - { - int widthTMP = ((JSONObject) sourceTMP).getInt("width"); - int heightTMP = ((JSONObject) sourceTMP).getInt("height"); - int areaTMP = widthTMP * heightTMP; - - if (areaTMP > bestArea) { - // Better way to determine best image? - bestArea = widthTMP; - bestUrl = ((JSONObject) sourceTMP).getString("url"); - } - } - list.add(bestUrl); - bestUrl = ""; - bestArea = 0; - } - return list; + list.add(bestUrl); + bestUrl = ""; + bestArea = 0; } + + return list; } @Override @@ -323,12 +272,14 @@ public class ScrolllerRipper extends AbstractJSONRipper { Object iterator = null; if (source.has("iterator")) { - // sorted + // Sort requested, custom JSON. iterator = source.getJSONObject("iterator").getJSONObject("data").getJSONObject("fetchSubreddit").get("iterator"); } else { iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator"); } + if (!iterator.toString().equals("null")) { + // Need to change page. try { return prepareQuery(iterator.toString(), this.getGID(url), getParameter(url,"sort")); } catch (URISyntaxException e) { @@ -339,5 +290,4 @@ public class ScrolllerRipper extends AbstractJSONRipper { return null; } } - } \ No newline at end of file