1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-02-22 07:03:35 +01:00

fix: instagramRipper, replaced Nashorn with GraalVM.js

This commit is contained in:
cariah 2020-08-05 17:25:42 +02:00
parent f639758a74
commit 029422b417
2 changed files with 37 additions and 36 deletions

View File

@ -42,6 +42,11 @@
<artifactId>jsoup</artifactId>
<version>1.8.1</version>
</dependency>
<dependency>
<groupId>org.graalvm.js</groupId>
<artifactId>js</artifactId>
<version>20.1.0</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>

View File

@ -1,18 +1,13 @@
package com.rarchives.ripme.ripper.rippers;
import com.oracle.js.parser.ErrorManager;
import com.oracle.js.parser.Parser;
import com.oracle.js.parser.ScriptEnvironment;
import com.oracle.js.parser.Source;
import com.oracle.js.parser.ir.*;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import jdk.nashorn.internal.ir.Block;
import jdk.nashorn.internal.ir.CallNode;
import jdk.nashorn.internal.ir.ExpressionStatement;
import jdk.nashorn.internal.ir.FunctionNode;
import jdk.nashorn.internal.ir.Statement;
import jdk.nashorn.internal.parser.Parser;
import jdk.nashorn.internal.runtime.Context;
import jdk.nashorn.internal.runtime.ErrorManager;
import jdk.nashorn.internal.runtime.Source;
import jdk.nashorn.internal.runtime.options.Options;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection;
@ -26,12 +21,7 @@ import java.time.Instant;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Spliterators;
import java.util.*;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
@ -176,13 +166,17 @@ public class InstagramRipper extends AbstractJSONRipper {
if (postRip) {
return null;
}
Predicate<String> hrefFilter = (storiesRip || pinnedReelRip) ? href -> href.contains("Consumer.js") :
href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
Predicate<String> hrefFilter = href -> href.contains("Consumer.js");
if (taggedRip) {
hrefFilter = href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
}
String href = doc.select("link[rel=preload]").stream()
.map(link -> link.attr("href"))
.filter(hrefFilter)
.findFirst().orElse("");
.map(link -> link.attr("href"))
.filter(hrefFilter)
.findFirst().orElse("");
String body = Http.url("https://www.instagram.com" + href).cookies(cookies).response().body();
Function<String, String> hashExtractor =
@ -386,7 +380,7 @@ public class InstagramRipper extends AbstractJSONRipper {
case "GraphSidecar":
JSONArray sideCar = getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges");
return getStreamOfJsonArray(sideCar).map(object -> object.getJSONObject("node"))
.flatMap(this::parseRootForUrls);
.flatMap(this::parseRootForUrls);
default:
return Stream.empty();
}
@ -415,18 +409,19 @@ public class InstagramRipper extends AbstractJSONRipper {
/* ------------------------------------------------------------------------------------------------------- */
private String getHashValue(String javaScriptData, String keyword, int offset) {
List<Statement> statements = getJsBodyBlock(javaScriptData).getStatements();
return statements.stream()
.flatMap(statement -> filterItems(statement, ExpressionStatement.class))
.map(ExpressionStatement::getExpression)
.flatMap(expression -> filterItems(expression, CallNode.class))
.map(CallNode::getArgs)
.map(expressions -> expressions.get(0))
.flatMap(expression -> filterItems(expression, FunctionNode.class))
.map(FunctionNode::getBody)
.map(Block::getStatements)
.map(statementList -> lookForHash(statementList, keyword, offset))
.filter(Objects::nonNull)
.findFirst().orElse(null);
.flatMap(statement -> filterItems(statement, ExpressionStatement.class))
.map(ExpressionStatement::getExpression)
.flatMap(expression -> filterItems(expression, CallNode.class))
.map(CallNode::getArgs)
.map(expressions -> expressions.get(0))
.flatMap(expression -> filterItems(expression, FunctionNode.class))
.map(FunctionNode::getBody)
.map(Block::getStatements)
.map(statementList -> lookForHash(statementList, keyword, offset))
.filter(Objects::nonNull)
.findFirst().orElse(null);
}
private String lookForHash(List<Statement> list, String keyword, int offset) {
@ -444,9 +439,10 @@ public class InstagramRipper extends AbstractJSONRipper {
}
private Block getJsBodyBlock(String javaScriptData) {
ErrorManager errors = new ErrorManager();
Context context = new Context(new Options("nashorn"), errors, Thread.currentThread().getContextClassLoader());
return new Parser(context.getEnv(), Source.sourceFor("name", javaScriptData), errors).parse().getBody();
ScriptEnvironment env = ScriptEnvironment.builder().ecmaScriptVersion(10).constAsVar(true).build();
ErrorManager errorManager = new ErrorManager.ThrowErrorManager();
Source src = Source.sourceFor("name", javaScriptData);
return new Parser(env, src, errorManager).parse().getBody();
}
// Some JSON helper methods below