mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-07 14:26:36 +02:00
Added support for sort parameter and WebSocket dependency
This commit is contained in:
5
pom.xml
5
pom.xml
@@ -83,6 +83,11 @@
|
|||||||
<artifactId>httpmime</artifactId>
|
<artifactId>httpmime</artifactId>
|
||||||
<version>4.3.3</version>
|
<version>4.3.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.java-websocket</groupId>
|
||||||
|
<artifactId>Java-WebSocket</artifactId>
|
||||||
|
<version>1.5.1</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
|
@@ -4,17 +4,21 @@ import java.io.BufferedReader;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.OutputStreamWriter;
|
import java.io.OutputStreamWriter;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.*;
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.oracle.truffle.js.nodes.access.IteratorStepNode;
|
||||||
|
import com.oracle.truffle.js.runtime.builtins.JSON;
|
||||||
|
import org.java_websocket.client.WebSocketClient;
|
||||||
|
|
||||||
import org.apache.http.NameValuePair;
|
import org.apache.http.NameValuePair;
|
||||||
import org.apache.http.client.utils.URLEncodedUtils;
|
import org.apache.http.client.utils.URLEncodedUtils;
|
||||||
|
import org.java_websocket.handshake.ServerHandshake;
|
||||||
import org.json.JSONArray;
|
import org.json.JSONArray;
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
@@ -46,7 +50,7 @@ public class ScrolllerRipper extends AbstractJSONRipper {
|
|||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
throw new MalformedURLException("Expected scrolller.com URL format: " +
|
throw new MalformedURLException("Expected scrolller.com URL format: " +
|
||||||
"scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead");
|
"scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -55,34 +59,56 @@ public class ScrolllerRipper extends AbstractJSONRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private JSONObject prepareQuery(String iterator, String gid) throws IOException {
|
private JSONObject prepareQuery(String iterator, String gid, String sortByString) throws IOException, URISyntaxException {
|
||||||
|
|
||||||
// Prepares the JSONObject we need to pass to the GraphQL query.
|
// Prepares the JSONObject we need to pass to the GraphQL query.
|
||||||
|
|
||||||
String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }";
|
if (sortByString.equals("")) {
|
||||||
String filterString = convertFilterString(getParameter(this.url,"filter"));
|
// Sorting not selected
|
||||||
|
String queryString = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }";
|
||||||
|
String filterString = convertFilterString(getParameter(this.url,"filter"));
|
||||||
|
|
||||||
JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid));
|
JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid));
|
||||||
|
|
||||||
if (iterator != null) {
|
if (iterator != null) {
|
||||||
// Iterator is not present on the first page
|
// Iterator is not present on the first page
|
||||||
variablesObject.put("iterator", iterator);
|
variablesObject.put("iterator", iterator);
|
||||||
|
}
|
||||||
|
if (!filterString.equals("NOFILTER")) {
|
||||||
|
// We could also pass filter="" but not including it if not present is cleaner
|
||||||
|
variablesObject.put("filter", filterString);
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString);
|
||||||
|
|
||||||
|
return getPosts(finalQueryObject);
|
||||||
}
|
}
|
||||||
if (!filterString.equals("NOFILTER")) {
|
else {
|
||||||
// We could also pass filter="" but not including it if not present is cleaner
|
|
||||||
variablesObject.put("filter", filterString);
|
String queryString = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }";
|
||||||
|
String filterString = convertFilterString(getParameter(this.url,"filter"));
|
||||||
|
|
||||||
|
JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase());
|
||||||
|
|
||||||
|
if (iterator != null) {
|
||||||
|
// Iterator is not present on the first page
|
||||||
|
variablesObject.put("iterator", iterator);
|
||||||
|
}
|
||||||
|
if (!filterString.equals("NOFILTER")) {
|
||||||
|
// We could also pass filter="" but not including it if not present is cleaner
|
||||||
|
variablesObject.put("filter", filterString);
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString);
|
||||||
|
|
||||||
|
return getPostsSorted(finalQueryObject);
|
||||||
}
|
}
|
||||||
|
|
||||||
JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", queryString);
|
|
||||||
|
|
||||||
return getPosts(finalQueryObject);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String convertFilterString(String filterParameter) {
|
public String convertFilterString(String filterParameter) {
|
||||||
// Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query
|
// Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query
|
||||||
// I could basically remove the last "s" and uppercase instead of this switch statement but this looks easier to read.
|
// I could basically remove the last "s" and call toUpperCase instead of this switch statement but this looks easier to read.
|
||||||
switch (filterParameter) {
|
switch (filterParameter) {
|
||||||
case "pictures":
|
case "pictures":
|
||||||
return "PICTURE";
|
return "PICTURE";
|
||||||
@@ -118,9 +144,7 @@ public class ScrolllerRipper extends AbstractJSONRipper {
|
|||||||
private JSONObject getPosts(JSONObject data) {
|
private JSONObject getPosts(JSONObject data) {
|
||||||
// The actual GraphQL query call
|
// The actual GraphQL query call
|
||||||
|
|
||||||
// JSoup wants POST data in key=value but I need to write a JSON body so I can't use it...
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
String url = "https://api.scrolller.com/api/v2/graphql";
|
String url = "https://api.scrolller.com/api/v2/graphql";
|
||||||
|
|
||||||
URL obj = new URL(url);
|
URL obj = new URL(url);
|
||||||
@@ -139,75 +163,181 @@ public class ScrolllerRipper extends AbstractJSONRipper {
|
|||||||
|
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
|
BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
|
||||||
String inputLine;
|
String inputLine;
|
||||||
StringBuffer html = new StringBuffer();
|
StringBuffer jsonString = new StringBuffer();
|
||||||
|
|
||||||
while ((inputLine = in.readLine()) != null) {
|
while ((inputLine = in.readLine()) != null) {
|
||||||
html.append(inputLine);
|
jsonString.append(inputLine);
|
||||||
}
|
}
|
||||||
|
|
||||||
in.close();
|
in.close();
|
||||||
conn.disconnect();
|
conn.disconnect();
|
||||||
|
|
||||||
return new JSONObject(html.toString());
|
return new JSONObject(jsonString.toString());
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
return new JSONObject("{}");
|
return new JSONObject("{}");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private JSONObject getPostsSorted(JSONObject data) throws MalformedURLException {
|
||||||
|
|
||||||
|
// The actual GraphQL query call (if sort parameter is present)
|
||||||
|
try {
|
||||||
|
|
||||||
|
ArrayList<String> postsJsonStrings = new ArrayList<>();
|
||||||
|
|
||||||
|
WebSocketClient wsc = new WebSocketClient(new URI("wss://api.scrolller.com/api/v2/graphql")) {
|
||||||
|
@Override
|
||||||
|
public void onOpen(ServerHandshake serverHandshake) {
|
||||||
|
// As soon as the WebSocket connects send our query
|
||||||
|
this.send(data.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onMessage(String s) {
|
||||||
|
postsJsonStrings.add(s);
|
||||||
|
if (s.contains("{\"data\":{\"fetchSubreddit\":{\"__typename\":\"Iterator\",\"iterator\":")) {
|
||||||
|
// Iterator is the last field returned, once we received it we can close the connection.
|
||||||
|
this.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onClose(int i, String s, boolean b) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onError(Exception e) {
|
||||||
|
LOGGER.error(String.format("WebSocket error, server reported %s", e.getMessage()));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
wsc.connect();
|
||||||
|
|
||||||
|
while (!wsc.isClosed()) {
|
||||||
|
// Posts list is not over until the connection closes.
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONObject finalObject = new JSONObject();
|
||||||
|
JSONArray posts = new JSONArray();
|
||||||
|
|
||||||
|
// Iterator is the last object in the post list, let's duplicate it in his own object for clarity.
|
||||||
|
finalObject.put("iterator", new JSONObject(postsJsonStrings.get(postsJsonStrings.size()-1)));
|
||||||
|
|
||||||
|
for (String postString : postsJsonStrings) {
|
||||||
|
posts.put(new JSONObject(postString));
|
||||||
|
}
|
||||||
|
finalObject.put("posts", posts);
|
||||||
|
|
||||||
|
return finalObject;
|
||||||
|
|
||||||
|
|
||||||
|
} catch (URISyntaxException ue) {
|
||||||
|
// Nothing to catch, it's an hardcoded URI.
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<String> getURLsFromJSON(JSONObject json) throws JSONException {
|
protected List<String> getURLsFromJSON(JSONObject json) throws JSONException {
|
||||||
JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items");
|
|
||||||
int bestArea = 0;
|
if (json.has("posts")) {
|
||||||
String bestUrl = "";
|
// If the JSONObject contains the key "posts" it's our custom JSON made after the WebSocket call.
|
||||||
List<String> list = new ArrayList<>();
|
|
||||||
|
JSONArray itemsList = json.getJSONArray("posts");
|
||||||
|
int bestArea = 0;
|
||||||
|
String bestUrl = "";
|
||||||
|
List<String> list = new ArrayList<>();
|
||||||
|
|
||||||
|
|
||||||
for (Object item : itemsList) {
|
for (Object item : itemsList) {
|
||||||
JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources");
|
if (((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) {
|
||||||
for (Object sourceTMP : sourcesTMP)
|
// Is it really a post? It could be the subreddit description or the iterator (first and last item)
|
||||||
{
|
JSONArray sourcesTMP = ((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").getJSONArray("mediaSources");
|
||||||
int widthTMP = ((JSONObject) sourceTMP).getInt("width");
|
for (Object sourceTMP : sourcesTMP)
|
||||||
int heightTMP = ((JSONObject) sourceTMP).getInt("height");
|
{
|
||||||
int areaTMP = widthTMP * heightTMP;
|
int widthTMP = ((JSONObject) sourceTMP).getInt("width");
|
||||||
|
int heightTMP = ((JSONObject) sourceTMP).getInt("height");
|
||||||
|
int areaTMP = widthTMP * heightTMP;
|
||||||
|
|
||||||
|
if (areaTMP > bestArea) {
|
||||||
|
// Better way to determine best image?
|
||||||
|
bestArea = widthTMP;
|
||||||
|
bestUrl = ((JSONObject) sourceTMP).getString("url");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list.add(bestUrl);
|
||||||
|
bestUrl = "";
|
||||||
|
bestArea = 0;
|
||||||
|
|
||||||
if (areaTMP > bestArea) {
|
|
||||||
// Better way to determine best image?
|
|
||||||
bestArea = widthTMP;
|
|
||||||
bestUrl = ((JSONObject) sourceTMP).getString("url");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
list.add(bestUrl);
|
return list;
|
||||||
bestUrl = "";
|
|
||||||
bestArea = 0;
|
} else {
|
||||||
|
JSONArray itemsList = json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items");
|
||||||
|
int bestArea = 0;
|
||||||
|
String bestUrl = "";
|
||||||
|
List<String> list = new ArrayList<>();
|
||||||
|
|
||||||
|
|
||||||
|
for (Object item : itemsList) {
|
||||||
|
JSONArray sourcesTMP = ((JSONObject) item).getJSONArray("mediaSources");
|
||||||
|
for (Object sourceTMP : sourcesTMP)
|
||||||
|
{
|
||||||
|
int widthTMP = ((JSONObject) sourceTMP).getInt("width");
|
||||||
|
int heightTMP = ((JSONObject) sourceTMP).getInt("height");
|
||||||
|
int areaTMP = widthTMP * heightTMP;
|
||||||
|
|
||||||
|
if (areaTMP > bestArea) {
|
||||||
|
// Better way to determine best image?
|
||||||
|
bestArea = widthTMP;
|
||||||
|
bestUrl = ((JSONObject) sourceTMP).getString("url");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list.add(bestUrl);
|
||||||
|
bestUrl = "";
|
||||||
|
bestArea = 0;
|
||||||
|
}
|
||||||
|
return list;
|
||||||
}
|
}
|
||||||
return list;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected JSONObject getFirstPage() throws IOException {
|
protected JSONObject getFirstPage() throws IOException {
|
||||||
if (getParameter(url,"sort") != null) {
|
try {
|
||||||
// I need support for the WebSocket protocol to implement sorting.
|
return prepareQuery(null, this.getGID(url), getParameter(url,"sort"));
|
||||||
// A GraphQL query to the API with the "sortBy" variable can't come from a POST request or it will return error 500, it has to come from a WebSocket.
|
} catch (URISyntaxException e) {
|
||||||
LOGGER.warn("Sorting is not currently implemented and it will be ignored");
|
LOGGER.error(String.format("Error obtaining first page: %s", e.getMessage()));
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
return prepareQuery(null, this.getGID(url));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JSONObject getNextPage(JSONObject source) throws IOException {
|
public JSONObject getNextPage(JSONObject source) throws IOException {
|
||||||
// Every call the the API contains an "iterator" string that we need to pass to the API to get the next page
|
// Every call the the API contains an "iterator" string that we need to pass to the API to get the next page
|
||||||
// Checking if iterator is null is not working for some reason, hence why the weird "iterator.toString().equals("null")"
|
// Checking if iterator is null is not working for some reason, hence why the weird "iterator.toString().equals("null")"
|
||||||
Object iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator");
|
|
||||||
|
Object iterator = null;
|
||||||
|
if (source.has("iterator")) {
|
||||||
|
// sorted
|
||||||
|
iterator = source.getJSONObject("iterator").getJSONObject("data").getJSONObject("fetchSubreddit").get("iterator");
|
||||||
|
} else {
|
||||||
|
iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator");
|
||||||
|
}
|
||||||
if (!iterator.toString().equals("null")) {
|
if (!iterator.toString().equals("null")) {
|
||||||
return prepareQuery(iterator.toString(), this.getGID(url));
|
try {
|
||||||
|
return prepareQuery(iterator.toString(), this.getGID(url), getParameter(url,"sort"));
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
LOGGER.error(String.format("Error changing page: %s", e.getMessage()));
|
||||||
|
return null;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
Reference in New Issue
Block a user