diff --git a/java b/java
new file mode 100644
index 00000000..e69de29b
diff --git a/pom.xml b/pom.xml
index fb1bb42e..5432aa53 100644
--- a/pom.xml
+++ b/pom.xml
@@ -83,6 +83,11 @@
httpmime
4.3.3
+
+ org.java-websocket
+ Java-WebSocket
+ 1.5.1
+
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java
index afadcd1f..a333eac6 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ScrolllerRipper.java
@@ -4,17 +4,21 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
-import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
-import java.net.URL;
+import java.net.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.oracle.truffle.js.nodes.access.IteratorStepNode;
+import com.oracle.truffle.js.runtime.builtins.JSON;
+import org.java_websocket.client.WebSocketClient;
+
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
+import org.java_websocket.handshake.ServerHandshake;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
@@ -46,7 +50,7 @@ public class ScrolllerRipper extends AbstractJSONRipper {
return m.group(1);
}
throw new MalformedURLException("Expected scrolller.com URL format: " +
- "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead");
+ "scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead");
}
@Override
@@ -55,34 +59,56 @@ public class ScrolllerRipper extends AbstractJSONRipper {
}
- private JSONObject prepareQuery(String iterator, String gid) throws IOException {
+ private JSONObject prepareQuery(String iterator, String gid, String sortByString) throws IOException, URISyntaxException {
// Prepares the JSONObject we need to pass to the GraphQL query.
- String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }";
- String filterString = convertFilterString(getParameter(this.url,"filter"));
+ if (sortByString.equals("")) {
+ // Sorting not selected
+ String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }";
+ String filterString = convertFilterString(getParameter(this.url,"filter"));
- JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid));
+ JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid));
- if (iterator != null) {
- // Iterator is not present on the first page
- variablesObject.put("iterator", iterator);
+ if (iterator != null) {
+ // Iterator is not present on the first page
+ variablesObject.put("iterator", iterator);
+ }
+ if (!filterString.equals("NOFILTER")) {
+ // We could also pass filter="" but not including it if not present is cleaner
+ variablesObject.put("filter", filterString);
+ }
+
+ JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString);
+
+ return getPosts(finalQueryObject);
}
- if (!filterString.equals("NOFILTER")) {
- // We could also pass filter="" but not including it if not present is cleaner
- variablesObject.put("filter", filterString);
+ else {
+
+ String queryString = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }";
+ String filterString = convertFilterString(getParameter(this.url,"filter"));
+
+ JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase());
+
+ if (iterator != null) {
+ // Iterator is not present on the first page
+ variablesObject.put("iterator", iterator);
+ }
+ if (!filterString.equals("NOFILTER")) {
+ // We could also pass filter="" but not including it if not present is cleaner
+ variablesObject.put("filter", filterString);
+ }
+
+ JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString);
+
+ return getPostsSorted(finalQueryObject);
}
-
- JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString);
-
- return getPosts(finalQueryObject);
-
}
public String convertFilterString(String filterParameter) {
// Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query
- // I could basically remove the last "s" and uppercase instead of this switch statement but this looks easier to read.
+ // I could basically remove the last "s" and call toUpperCase instead of this switch statement but this looks easier to read.
switch (filterParameter) {
case "pictures":
return "PICTURE";
@@ -118,9 +144,7 @@ public class ScrolllerRipper extends AbstractJSONRipper {
private JSONObject getPosts(JSONObject data) {
// The actual GraphQL query call
- // JSoup wants POST data in key=value but I need to write a JSON body so I can't use it...
try {
-
String url = "https://api.scrolller.com/api/v2/graphql";
URL obj = new URL(url);
@@ -139,75 +163,181 @@ public class ScrolllerRipper extends AbstractJSONRipper {
BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String inputLine;
- StringBuffer html = new StringBuffer();
+ StringBuffer jsonString = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
- html.append(inputLine);
+ jsonString.append(inputLine);
}
in.close();
conn.disconnect();
- return new JSONObject(html.toString());
+ return new JSONObject(jsonString.toString());
} catch (Exception e) {
e.printStackTrace();
}
return new JSONObject("{}");
-}
+ }
+
+ private JSONObject getPostsSorted(JSONObject data) throws MalformedURLException {
+
+ // The actual GraphQL query call (if sort parameter is present)
+ try {
+
+ ArrayList postsJsonStrings = new ArrayList<>();
+
+ WebSocketClient wsc = new WebSocketClient(new URI("wss://api.scrolller.com/api/v2/graphql")) {
+ @Override
+ public void onOpen(ServerHandshake serverHandshake) {
+ // As soon as the WebSocket connects send our query
+ this.send(data.toString());
+ }
+
+ @Override
+ public void onMessage(String s) {
+ postsJsonStrings.add(s);
+ if (s.contains("{\"data\":{\"fetchSubreddit\":{\"__typename\":\"Iterator\",\"iterator\":")) {
+ // Iterator is the last field returned, once we received it we can close the connection.
+ this.close();
+ }
+ }
+
+ @Override
+ public void onClose(int i, String s, boolean b) {
+ }
+
+ @Override
+ public void onError(Exception e) {
+ LOGGER.error(String.format("WebSocket error, server reported %s", e.getMessage()));
+ }
+ };
+ wsc.connect();
+
+ while (!wsc.isClosed()) {
+ // Posts list is not over until the connection closes.
+ }
+
+ JSONObject finalObject = new JSONObject();
+ JSONArray posts = new JSONArray();
+
+ // Iterator is the last object in the post list, let's duplicate it in his own object for clarity.
+ finalObject.put("iterator", new JSONObject(postsJsonStrings.get(postsJsonStrings.size()-1)));
+
+ for (String postString : postsJsonStrings) {
+ posts.put(new JSONObject(postString));
+ }
+ finalObject.put("posts", posts);
+
+ return finalObject;
+
+
+ } catch (URISyntaxException ue) {
+ // Nothing to catch, it's an hardcoded URI.
+ }
+
+ return null;
+ }
@Override
protected List getURLsFromJSON(JSONObject json) throws JSONException {
- JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items");
- int bestArea = 0;
- String bestUrl = "";
- List list = new ArrayList<>();
+
+ if (json.has("posts")) {
+ // If the JSONObject contains the key "posts" it's our custom JSON made after the WebSocket call.
+
+ JSONArray itemsList = json.getJSONArray("posts");
+ int bestArea = 0;
+ String bestUrl = "";
+ List list = new ArrayList<>();
- for (Object item : itemsList) {
- JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources");
- for (Object sourceTMP : sourcesTMP)
- {
- int widthTMP = ((JSONObject) sourceTMP).getInt("width");
- int heightTMP = ((JSONObject) sourceTMP).getInt("height");
- int areaTMP = widthTMP * heightTMP;
+ for (Object item : itemsList) {
+ if (((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) {
+ // Is it really a post? It could be the subreddit description or the iterator (first and last item)
+ JSONArray sourcesTMP = ((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").getJSONArray("mediaSources");
+ for (Object sourceTMP : sourcesTMP)
+ {
+ int widthTMP = ((JSONObject) sourceTMP).getInt("width");
+ int heightTMP = ((JSONObject) sourceTMP).getInt("height");
+ int areaTMP = widthTMP * heightTMP;
+
+ if (areaTMP > bestArea) {
+ // Better way to determine best image?
+ bestArea = widthTMP;
+ bestUrl = ((JSONObject) sourceTMP).getString("url");
+ }
+ }
+ list.add(bestUrl);
+ bestUrl = "";
+ bestArea = 0;
- if (areaTMP > bestArea) {
- // Better way to determine best image?
- bestArea = widthTMP;
- bestUrl = ((JSONObject) sourceTMP).getString("url");
}
}
- list.add(bestUrl);
- bestUrl = "";
- bestArea = 0;
+ return list;
+
+ } else {
+ JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items");
+ int bestArea = 0;
+ String bestUrl = "";
+ List list = new ArrayList<>();
+
+
+ for (Object item : itemsList) {
+ JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources");
+ for (Object sourceTMP : sourcesTMP)
+ {
+ int widthTMP = ((JSONObject) sourceTMP).getInt("width");
+ int heightTMP = ((JSONObject) sourceTMP).getInt("height");
+ int areaTMP = widthTMP * heightTMP;
+
+ if (areaTMP > bestArea) {
+ // Better way to determine best image?
+ bestArea = widthTMP;
+ bestUrl = ((JSONObject) sourceTMP).getString("url");
+ }
+ }
+ list.add(bestUrl);
+ bestUrl = "";
+ bestArea = 0;
+ }
+ return list;
}
- return list;
}
@Override
protected JSONObject getFirstPage() throws IOException {
- if (getParameter(url,"sort") != null) {
- // I need support for the WebSocket protocol to implement sorting.
- // A GraphQL query to the API with the "sortBy" variable can't come from a POST request or it will return error 500, it has to come from a WebSocket.
- LOGGER.warn("Sorting is not currently implemented and it will be ignored");
+ try {
+ return prepareQuery(null, this.getGID(url), getParameter(url,"sort"));
+ } catch (URISyntaxException e) {
+ LOGGER.error(String.format("Error obtaining first page: %s", e.getMessage()));
+ return null;
}
- return prepareQuery(null, this.getGID(url));
}
@Override
public JSONObject getNextPage(JSONObject source) throws IOException {
// Every call the the API contains an "iterator" string that we need to pass to the API to get the next page
// Checking if iterator is null is not working for some reason, hence why the weird "iterator.toString().equals("null")"
- Object iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator");
+
+ Object iterator = null;
+ if (source.has("iterator")) {
+ // sorted
+ iterator = source.getJSONObject("iterator").getJSONObject("data").getJSONObject("fetchSubreddit").get("iterator");
+ } else {
+ iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator");
+ }
if (!iterator.toString().equals("null")) {
- return prepareQuery(iterator.toString(), this.getGID(url));
+ try {
+ return prepareQuery(iterator.toString(), this.getGID(url), getParameter(url,"sort"));
+ } catch (URISyntaxException e) {
+ LOGGER.error(String.format("Error changing page: %s", e.getMessage()));
+ return null;
+ }
} else {
return null;
}
-
}
}
\ No newline at end of file