mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-02-22 07:03:35 +01:00
fix: instagramRipper, replaced Nashorn with GraalVM.js
This commit is contained in:
parent
f639758a74
commit
029422b417
5
pom.xml
5
pom.xml
@ -42,6 +42,11 @@
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.8.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.graalvm.js</groupId>
|
||||
<artifactId>js</artifactId>
|
||||
<version>20.1.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
|
@ -1,18 +1,13 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.oracle.js.parser.ErrorManager;
|
||||
import com.oracle.js.parser.Parser;
|
||||
import com.oracle.js.parser.ScriptEnvironment;
|
||||
import com.oracle.js.parser.Source;
|
||||
import com.oracle.js.parser.ir.*;
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import jdk.nashorn.internal.ir.Block;
|
||||
import jdk.nashorn.internal.ir.CallNode;
|
||||
import jdk.nashorn.internal.ir.ExpressionStatement;
|
||||
import jdk.nashorn.internal.ir.FunctionNode;
|
||||
import jdk.nashorn.internal.ir.Statement;
|
||||
import jdk.nashorn.internal.parser.Parser;
|
||||
import jdk.nashorn.internal.runtime.Context;
|
||||
import jdk.nashorn.internal.runtime.ErrorManager;
|
||||
import jdk.nashorn.internal.runtime.Source;
|
||||
import jdk.nashorn.internal.runtime.options.Options;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection;
|
||||
@ -26,12 +21,7 @@ import java.time.Instant;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Spliterators;
|
||||
import java.util.*;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
@ -176,13 +166,17 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
if (postRip) {
|
||||
return null;
|
||||
}
|
||||
Predicate<String> hrefFilter = (storiesRip || pinnedReelRip) ? href -> href.contains("Consumer.js") :
|
||||
href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
|
||||
|
||||
Predicate<String> hrefFilter = href -> href.contains("Consumer.js");
|
||||
if (taggedRip) {
|
||||
hrefFilter = href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
|
||||
}
|
||||
|
||||
String href = doc.select("link[rel=preload]").stream()
|
||||
.map(link -> link.attr("href"))
|
||||
.filter(hrefFilter)
|
||||
.findFirst().orElse("");
|
||||
.map(link -> link.attr("href"))
|
||||
.filter(hrefFilter)
|
||||
.findFirst().orElse("");
|
||||
|
||||
String body = Http.url("https://www.instagram.com" + href).cookies(cookies).response().body();
|
||||
|
||||
Function<String, String> hashExtractor =
|
||||
@ -386,7 +380,7 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
case "GraphSidecar":
|
||||
JSONArray sideCar = getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges");
|
||||
return getStreamOfJsonArray(sideCar).map(object -> object.getJSONObject("node"))
|
||||
.flatMap(this::parseRootForUrls);
|
||||
.flatMap(this::parseRootForUrls);
|
||||
default:
|
||||
return Stream.empty();
|
||||
}
|
||||
@ -415,18 +409,19 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
/* ------------------------------------------------------------------------------------------------------- */
|
||||
private String getHashValue(String javaScriptData, String keyword, int offset) {
|
||||
List<Statement> statements = getJsBodyBlock(javaScriptData).getStatements();
|
||||
|
||||
return statements.stream()
|
||||
.flatMap(statement -> filterItems(statement, ExpressionStatement.class))
|
||||
.map(ExpressionStatement::getExpression)
|
||||
.flatMap(expression -> filterItems(expression, CallNode.class))
|
||||
.map(CallNode::getArgs)
|
||||
.map(expressions -> expressions.get(0))
|
||||
.flatMap(expression -> filterItems(expression, FunctionNode.class))
|
||||
.map(FunctionNode::getBody)
|
||||
.map(Block::getStatements)
|
||||
.map(statementList -> lookForHash(statementList, keyword, offset))
|
||||
.filter(Objects::nonNull)
|
||||
.findFirst().orElse(null);
|
||||
.flatMap(statement -> filterItems(statement, ExpressionStatement.class))
|
||||
.map(ExpressionStatement::getExpression)
|
||||
.flatMap(expression -> filterItems(expression, CallNode.class))
|
||||
.map(CallNode::getArgs)
|
||||
.map(expressions -> expressions.get(0))
|
||||
.flatMap(expression -> filterItems(expression, FunctionNode.class))
|
||||
.map(FunctionNode::getBody)
|
||||
.map(Block::getStatements)
|
||||
.map(statementList -> lookForHash(statementList, keyword, offset))
|
||||
.filter(Objects::nonNull)
|
||||
.findFirst().orElse(null);
|
||||
}
|
||||
|
||||
private String lookForHash(List<Statement> list, String keyword, int offset) {
|
||||
@ -444,9 +439,10 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
private Block getJsBodyBlock(String javaScriptData) {
|
||||
ErrorManager errors = new ErrorManager();
|
||||
Context context = new Context(new Options("nashorn"), errors, Thread.currentThread().getContextClassLoader());
|
||||
return new Parser(context.getEnv(), Source.sourceFor("name", javaScriptData), errors).parse().getBody();
|
||||
ScriptEnvironment env = ScriptEnvironment.builder().ecmaScriptVersion(10).constAsVar(true).build();
|
||||
ErrorManager errorManager = new ErrorManager.ThrowErrorManager();
|
||||
Source src = Source.sourceFor("name", javaScriptData);
|
||||
return new Parser(env, src, errorManager).parse().getBody();
|
||||
}
|
||||
|
||||
// Some JSON helper methods below
|
||||
|
Loading…
x
Reference in New Issue
Block a user