diff --git a/java b/java new file mode 100644 index 00000000..e69de29b diff --git a/pom.xml b/pom.xml index fb1bb42e..5432aa53 100644 --- a/pom.xml +++ b/pom.xml @@ -83,6 +83,11 @@ httpmime 4.3.3 + + org.java-websocket + Java-WebSocket + 1.5.1 + diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java index afadcd1f..a333eac6 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java @@ -4,17 +4,21 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; -import java.net.HttpURLConnection; -import java.net.MalformedURLException; -import java.net.URL; +import java.net.*; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.oracle.truffle.js.nodes.access.IteratorStepNode; +import com.oracle.truffle.js.runtime.builtins.JSON; +import org.java_websocket.client.WebSocketClient; + import org.apache.http.NameValuePair; import org.apache.http.client.utils.URLEncodedUtils; +import org.java_websocket.handshake.ServerHandshake; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -46,7 +50,7 @@ public class ScrolllerRipper extends AbstractJSONRipper { return m.group(1); } throw new MalformedURLException("Expected scrolller.com URL format: " + - "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead"); + "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead"); } @Override @@ -55,34 +59,56 @@ public class ScrolllerRipper extends AbstractJSONRipper { } - private JSONObject prepareQuery(String iterator, String gid) throws IOException { + private JSONObject prepareQuery(String iterator, String gid, String sortByString) throws IOException, URISyntaxException { // Prepares the JSONObject we need to pass to the GraphQL query. - String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }"; - String filterString = convertFilterString(getParameter(this.url,"filter")); + if (sortByString.equals("")) { + // Sorting not selected + String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }"; + String filterString = convertFilterString(getParameter(this.url,"filter")); - JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)); + JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)); - if (iterator != null) { - // Iterator is not present on the first page - variablesObject.put("iterator", iterator); + if (iterator != null) { + // Iterator is not present on the first page + variablesObject.put("iterator", iterator); + } + if (!filterString.equals("NOFILTER")) { + // We could also pass filter="" but not including it if not present is cleaner + variablesObject.put("filter", filterString); + } + + JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString); + + return getPosts(finalQueryObject); } - if (!filterString.equals("NOFILTER")) { - // We could also pass filter="" but not including it if not present is cleaner - variablesObject.put("filter", filterString); + else { + + String queryString = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }"; + String filterString = convertFilterString(getParameter(this.url,"filter")); + + JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase()); + + if (iterator != null) { + // Iterator is not present on the first page + variablesObject.put("iterator", iterator); + } + if (!filterString.equals("NOFILTER")) { + // We could also pass filter="" but not including it if not present is cleaner + variablesObject.put("filter", filterString); + } + + JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString); + + return getPostsSorted(finalQueryObject); } - - JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString); - - return getPosts(finalQueryObject); - } public String convertFilterString(String filterParameter) { // Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query - // I could basically remove the last "s" and uppercase instead of this switch statement but this looks easier to read. + // I could basically remove the last "s" and call toUpperCase instead of this switch statement but this looks easier to read. switch (filterParameter) { case "pictures": return "PICTURE"; @@ -118,9 +144,7 @@ public class ScrolllerRipper extends AbstractJSONRipper { private JSONObject getPosts(JSONObject data) { // The actual GraphQL query call - // JSoup wants POST data in key=value but I need to write a JSON body so I can't use it... try { - String url = "https://api.scrolller.com/api/v2/graphql"; URL obj = new URL(url); @@ -139,75 +163,181 @@ public class ScrolllerRipper extends AbstractJSONRipper { BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); String inputLine; - StringBuffer html = new StringBuffer(); + StringBuffer jsonString = new StringBuffer(); while ((inputLine = in.readLine()) != null) { - html.append(inputLine); + jsonString.append(inputLine); } in.close(); conn.disconnect(); - return new JSONObject(html.toString()); + return new JSONObject(jsonString.toString()); } catch (Exception e) { e.printStackTrace(); } return new JSONObject("{}"); -} + } + + private JSONObject getPostsSorted(JSONObject data) throws MalformedURLException { + + // The actual GraphQL query call (if sort parameter is present) + try { + + ArrayList postsJsonStrings = new ArrayList<>(); + + WebSocketClient wsc = new WebSocketClient(new URI("wss://api.scrolller.com/api/v2/graphql")) { + @Override + public void onOpen(ServerHandshake serverHandshake) { + // As soon as the WebSocket connects send our query + this.send(data.toString()); + } + + @Override + public void onMessage(String s) { + postsJsonStrings.add(s); + if (s.contains("{\"data\":{\"fetchSubreddit\":{\"__typename\":\"Iterator\",\"iterator\":")) { + // Iterator is the last field returned, once we received it we can close the connection. + this.close(); + } + } + + @Override + public void onClose(int i, String s, boolean b) { + } + + @Override + public void onError(Exception e) { + LOGGER.error(String.format("WebSocket error, server reported %s", e.getMessage())); + } + }; + wsc.connect(); + + while (!wsc.isClosed()) { + // Posts list is not over until the connection closes. + } + + JSONObject finalObject = new JSONObject(); + JSONArray posts = new JSONArray(); + + // Iterator is the last object in the post list, let's duplicate it in his own object for clarity. + finalObject.put("iterator", new JSONObject(postsJsonStrings.get(postsJsonStrings.size()-1))); + + for (String postString : postsJsonStrings) { + posts.put(new JSONObject(postString)); + } + finalObject.put("posts", posts); + + return finalObject; + + + } catch (URISyntaxException ue) { + // Nothing to catch, it's an hardcoded URI. + } + + return null; + } @Override protected List getURLsFromJSON(JSONObject json) throws JSONException { - JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items"); - int bestArea = 0; - String bestUrl = ""; - List list = new ArrayList<>(); + + if (json.has("posts")) { + // If the JSONObject contains the key "posts" it's our custom JSON made after the WebSocket call. + + JSONArray itemsList = json.getJSONArray("posts"); + int bestArea = 0; + String bestUrl = ""; + List list = new ArrayList<>(); - for (Object item : itemsList) { - JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources"); - for (Object sourceTMP : sourcesTMP) - { - int widthTMP = ((JSONObject) sourceTMP).getInt("width"); - int heightTMP = ((JSONObject) sourceTMP).getInt("height"); - int areaTMP = widthTMP * heightTMP; + for (Object item : itemsList) { + if (((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) { + // Is it really a post? It could be the subreddit description or the iterator (first and last item) + JSONArray sourcesTMP = ((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").getJSONArray("mediaSources"); + for (Object sourceTMP : sourcesTMP) + { + int widthTMP = ((JSONObject) sourceTMP).getInt("width"); + int heightTMP = ((JSONObject) sourceTMP).getInt("height"); + int areaTMP = widthTMP * heightTMP; + + if (areaTMP > bestArea) { + // Better way to determine best image? + bestArea = widthTMP; + bestUrl = ((JSONObject) sourceTMP).getString("url"); + } + } + list.add(bestUrl); + bestUrl = ""; + bestArea = 0; - if (areaTMP > bestArea) { - // Better way to determine best image? - bestArea = widthTMP; - bestUrl = ((JSONObject) sourceTMP).getString("url"); } } - list.add(bestUrl); - bestUrl = ""; - bestArea = 0; + return list; + + } else { + JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items"); + int bestArea = 0; + String bestUrl = ""; + List list = new ArrayList<>(); + + + for (Object item : itemsList) { + JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources"); + for (Object sourceTMP : sourcesTMP) + { + int widthTMP = ((JSONObject) sourceTMP).getInt("width"); + int heightTMP = ((JSONObject) sourceTMP).getInt("height"); + int areaTMP = widthTMP * heightTMP; + + if (areaTMP > bestArea) { + // Better way to determine best image? + bestArea = widthTMP; + bestUrl = ((JSONObject) sourceTMP).getString("url"); + } + } + list.add(bestUrl); + bestUrl = ""; + bestArea = 0; + } + return list; } - return list; } @Override protected JSONObject getFirstPage() throws IOException { - if (getParameter(url,"sort") != null) { - // I need support for the WebSocket protocol to implement sorting. - // A GraphQL query to the API with the "sortBy" variable can't come from a POST request or it will return error 500, it has to come from a WebSocket. - LOGGER.warn("Sorting is not currently implemented and it will be ignored"); + try { + return prepareQuery(null, this.getGID(url), getParameter(url,"sort")); + } catch (URISyntaxException e) { + LOGGER.error(String.format("Error obtaining first page: %s", e.getMessage())); + return null; } - return prepareQuery(null, this.getGID(url)); } @Override public JSONObject getNextPage(JSONObject source) throws IOException { // Every call the the API contains an "iterator" string that we need to pass to the API to get the next page // Checking if iterator is null is not working for some reason, hence why the weird "iterator.toString().equals("null")" - Object iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator"); + + Object iterator = null; + if (source.has("iterator")) { + // sorted + iterator = source.getJSONObject("iterator").getJSONObject("data").getJSONObject("fetchSubreddit").get("iterator"); + } else { + iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator"); + } if (!iterator.toString().equals("null")) { - return prepareQuery(iterator.toString(), this.getGID(url)); + try { + return prepareQuery(iterator.toString(), this.getGID(url), getParameter(url,"sort")); + } catch (URISyntaxException e) { + LOGGER.error(String.format("Error changing page: %s", e.getMessage())); + return null; + } } else { return null; } - } } \ No newline at end of file