1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-21 13:11:27 +02:00

Merge pull request #1724 from cariah/feat/insta-graalvmjs

fix: instagramRipper, replaced Nashorn with GraalVM.js
This commit is contained in:
cyian-1756
2020-10-01 19:59:11 +00:00
committed by GitHub
2 changed files with 37 additions and 36 deletions

View File

@@ -42,6 +42,11 @@
<artifactId>jsoup</artifactId>
<version>1.8.1</version>
</dependency>
<dependency>
<groupId>org.graalvm.js</groupId>
<artifactId>js</artifactId>
<version>20.1.0</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>

View File

@@ -1,18 +1,13 @@
package com.rarchives.ripme.ripper.rippers;
import com.oracle.js.parser.ErrorManager;
import com.oracle.js.parser.Parser;
import com.oracle.js.parser.ScriptEnvironment;
import com.oracle.js.parser.Source;
import com.oracle.js.parser.ir.*;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import jdk.nashorn.internal.ir.Block;
import jdk.nashorn.internal.ir.CallNode;
import jdk.nashorn.internal.ir.ExpressionStatement;
import jdk.nashorn.internal.ir.FunctionNode;
import jdk.nashorn.internal.ir.Statement;
import jdk.nashorn.internal.parser.Parser;
import jdk.nashorn.internal.runtime.Context;
import jdk.nashorn.internal.runtime.ErrorManager;
import jdk.nashorn.internal.runtime.Source;
import jdk.nashorn.internal.runtime.options.Options;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection;
@@ -26,12 +21,7 @@ import java.time.Instant;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Spliterators;
import java.util.*;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
@@ -176,13 +166,17 @@ public class InstagramRipper extends AbstractJSONRipper {
if (postRip) {
return null;
}
Predicate<String> hrefFilter = (storiesRip || pinnedReelRip) ? href -> href.contains("Consumer.js") :
href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
Predicate<String> hrefFilter = href -> href.contains("Consumer.js");
if (taggedRip) {
hrefFilter = href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
}
String href = doc.select("link[rel=preload]").stream()
.map(link -> link.attr("href"))
.filter(hrefFilter)
.findFirst().orElse("");
String body = Http.url("https://www.instagram.com" + href).cookies(cookies).response().body();
Function<String, String> hashExtractor =
@@ -415,6 +409,7 @@ public class InstagramRipper extends AbstractJSONRipper {
/* ------------------------------------------------------------------------------------------------------- */
private String getHashValue(String javaScriptData, String keyword, int offset) {
List<Statement> statements = getJsBodyBlock(javaScriptData).getStatements();
return statements.stream()
.flatMap(statement -> filterItems(statement, ExpressionStatement.class))
.map(ExpressionStatement::getExpression)
@@ -444,9 +439,10 @@ public class InstagramRipper extends AbstractJSONRipper {
}
private Block getJsBodyBlock(String javaScriptData) {
ErrorManager errors = new ErrorManager();
Context context = new Context(new Options("nashorn"), errors, Thread.currentThread().getContextClassLoader());
return new Parser(context.getEnv(), Source.sourceFor("name", javaScriptData), errors).parse().getBody();
ScriptEnvironment env = ScriptEnvironment.builder().ecmaScriptVersion(10).constAsVar(true).build();
ErrorManager errorManager = new ErrorManager.ThrowErrorManager();
Source src = Source.sourceFor("name", javaScriptData);
return new Parser(env, src, errorManager).parse().getBody();
}
// Some JSON helper methods below