mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-26 07:14:38 +02:00
Fixed ripper for HentaiNexus
This commit is contained in:
@@ -4,27 +4,22 @@ import java.io.IOException;
|
|||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Base64;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||||
|
import org.jsoup.nodes.DataNode;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
public class HentaiNexusRipper extends AbstractJSONRipper {
|
||||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
import com.rarchives.ripme.utils.Utils;
|
|
||||||
|
|
||||||
public class HentaiNexusRipper extends AbstractHTMLRipper {
|
|
||||||
|
|
||||||
private Document firstPage;
|
|
||||||
private DownloadThreadPool hentainexusThreadPool = new DownloadThreadPool("hentainexus");
|
|
||||||
@Override
|
|
||||||
public DownloadThreadPool getThreadPool() {
|
|
||||||
return hentainexusThreadPool;
|
|
||||||
}
|
|
||||||
|
|
||||||
public HentaiNexusRipper(URL url) throws IOException {
|
public HentaiNexusRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@@ -34,7 +29,6 @@ public class HentaiNexusRipper extends AbstractHTMLRipper {
|
|||||||
public String getHost() {
|
public String getHost() {
|
||||||
return "hentainexus";
|
return "hentainexus";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getDomain() {
|
public String getDomain() {
|
||||||
return "hentainexus.com";
|
return "hentainexus.com";
|
||||||
@@ -42,88 +36,148 @@ public class HentaiNexusRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("https?://hentainexus\\.com/view/([a-zA-Z0-9_\\-%]*)/?$");
|
/*
|
||||||
|
Valid URLs are /view/id, /read/id and those 2 with #pagenumber
|
||||||
|
https://hentainexus.com/view/9202
|
||||||
|
https://hentainexus.com/read/9202
|
||||||
|
https://hentainexus.com/view/9202#001
|
||||||
|
https://hentainexus.com/read/9202#001
|
||||||
|
*/
|
||||||
|
|
||||||
|
Pattern p = Pattern.compile("^https?://hentainexus\\.com/(?:view|read)/([0-9]+)(?:\\#[0-9]+)*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
throw new MalformedURLException("Expected hentainexus.com URL format: " +
|
throw new MalformedURLException("Expected hentainexus.com URL format: " +
|
||||||
"hentainexus.com/view/NUMBER - got " + url + " instead");
|
"hentainexus.com/view/id OR hentainexus.com/read/id - got " + url + "instead");
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getFirstPage() throws IOException {
|
|
||||||
// "url" is an instance field of the superclass
|
|
||||||
if (firstPage == null) {
|
|
||||||
firstPage = Http.url(url).get();
|
|
||||||
}
|
|
||||||
return firstPage;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
|
||||||
List<String> imageURLs = new ArrayList<>();
|
|
||||||
Elements thumbs = doc.select("div.is-multiline > div.column > a");
|
|
||||||
for (Element el : thumbs) {
|
|
||||||
imageURLs.add("https://" + getDomain() + el.attr("href"));
|
|
||||||
}
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
|
||||||
try {
|
|
||||||
Document gallery = Http.url(url).get();
|
|
||||||
return getHost() + "_" + gallery.select("h1.title").text();
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOGGER.info("Falling back");
|
|
||||||
}
|
|
||||||
|
|
||||||
return super.getAlbumTitle(url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void downloadURL(URL url, int index) {
|
public void downloadURL(URL url, int index) {
|
||||||
HentaiNexusImageThread t = new HentaiNexusImageThread(url, index);
|
addURLToDownload(url, getPrefix(index));
|
||||||
hentainexusThreadPool.addThread(t);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper class to find and download images found on "image" pages
|
|
||||||
*/
|
|
||||||
private class HentaiNexusImageThread extends Thread {
|
|
||||||
private URL url;
|
|
||||||
private int index;
|
|
||||||
|
|
||||||
HentaiNexusImageThread(URL url, int index) {
|
@Override
|
||||||
super();
|
protected List<String> getURLsFromJSON(JSONObject json) throws JSONException {
|
||||||
this.url = url;
|
|
||||||
this.index = index;
|
List<String> urlList = new ArrayList<>();
|
||||||
|
|
||||||
|
JSONArray imagesList = json.getJSONArray("f");
|
||||||
|
String host = json.getString("b");
|
||||||
|
String folder = json.getString("r");
|
||||||
|
String id = json.getString("i");
|
||||||
|
|
||||||
|
for (Object singleImage : imagesList) {
|
||||||
|
String hashTMP = ((JSONObject) singleImage).getString("h");
|
||||||
|
String fileNameTMP = ((JSONObject) singleImage).getString("p");
|
||||||
|
String imageUrlTMP = String.format("%s%s%s/%s/%s",host,folder,hashTMP,id,fileNameTMP);
|
||||||
|
urlList.add(imageUrlTMP);
|
||||||
|
}
|
||||||
|
|
||||||
|
return urlList;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
protected JSONObject getFirstPage() throws IOException {
|
||||||
fetchImage();
|
String jsonEncodedString = getJsonEncodedStringFromPage();
|
||||||
|
String jsonDecodedString = decodeJsonString(jsonEncodedString);
|
||||||
|
return new JSONObject(jsonDecodedString);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fetchImage() {
|
public String getJsonEncodedStringFromPage() throws MalformedURLException, IOException
|
||||||
try {
|
{
|
||||||
Document doc = Http.url(url).retries(3).get();
|
// Image data only appears on the /read/ page and not on the /view/ one.
|
||||||
Elements images = doc.select("figure.image > img");
|
URL readUrl = new URL(String.format("http://hentainexus.com/read/%s",getGID(url)));
|
||||||
if (images.isEmpty()) {
|
Document document = Http.url(readUrl).response().parse();
|
||||||
LOGGER.warn("Image not found at " + this.url);
|
|
||||||
return;
|
for (Element scripts : document.getElementsByTag("script")) {
|
||||||
}
|
for (DataNode dataNode : scripts.dataNodes()) {
|
||||||
Element image = images.first();
|
if (dataNode.getWholeData().contains("initReader")) {
|
||||||
String imgsrc = image.attr("src");
|
// Extract JSON encoded string from the JavaScript initReader() call.
|
||||||
String prefix = "";
|
String data = dataNode.getWholeData().trim().replaceAll("\\r|\\n|\\t","");
|
||||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
|
||||||
prefix = String.format("%03d_", index);
|
Pattern p = Pattern.compile(".*?initReader\\(\"(.*?)\",.*?\\).*?");
|
||||||
}
|
Matcher m = p.matcher(data);
|
||||||
addURLToDownload(new URL(imgsrc), prefix);
|
if (m.matches()) {
|
||||||
} catch (IOException e) {
|
return m.group(1);
|
||||||
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String decodeJsonString(String jsonEncodedString)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
The initReader() JavaScript function accepts 2 parameters: a weird string and the window title (we can ignore this).
|
||||||
|
The weird string is a JSON string with some bytes shifted and swapped around and then encoded in base64.
|
||||||
|
The following code is a Java adaptation of the initRender() JavaScript function after manual deobfuscation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
byte[] jsonBytes = Base64.getDecoder().decode(jsonEncodedString);
|
||||||
|
|
||||||
|
ArrayList unknownArray = new ArrayList();
|
||||||
|
ArrayList<Integer> indexesToUse = new ArrayList<>();
|
||||||
|
|
||||||
|
for (int i = 0x2; unknownArray.size() < 0x10; ++i) {
|
||||||
|
if (!indexesToUse.contains(i)) {
|
||||||
|
unknownArray.add(i);
|
||||||
|
for (int j = i << 0x1; j <= 0x100; j += i) {
|
||||||
|
if (!indexesToUse.contains(j)) {
|
||||||
|
indexesToUse.add(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
byte magicByte = 0x0;
|
||||||
|
for (int i = 0x0; i < 0x40; i++) {
|
||||||
|
magicByte = (byte) (signedToUnsigned(magicByte) ^ signedToUnsigned(jsonBytes[i]));
|
||||||
|
for (int j = 0x0; j < 0x8; j++) {
|
||||||
|
long unsignedMagicByteTMP = signedToUnsigned(magicByte);
|
||||||
|
magicByte = (byte) ((unsignedMagicByteTMP & 0x1) == 1 ? unsignedMagicByteTMP >>> 0x1 ^ 0xc : unsignedMagicByteTMP >>> 0x1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
magicByte = (byte) (magicByte & 0x7);
|
||||||
|
ArrayList<Integer> newArray = new ArrayList();
|
||||||
|
|
||||||
|
for (int i = 0x0; i < 0x100; i++) {
|
||||||
|
newArray.add(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int newIndex = 0, backup = 0;
|
||||||
|
for (int i = 0x0; i < 0x100; i++) {
|
||||||
|
newIndex = (newIndex + newArray.get(i) + (int) signedToUnsigned(jsonBytes[i % 0x40])) % 0x100;
|
||||||
|
backup = newArray.get(i);
|
||||||
|
newArray.set(i, newArray.get(newIndex));
|
||||||
|
newArray.set(newIndex, backup);
|
||||||
|
}
|
||||||
|
|
||||||
|
int magicByteTranslated = (int) unknownArray.get(magicByte);
|
||||||
|
int index1 = 0x0, index2 = 0x0, index3 = 0x0, swap1 = 0x0, xorNumber = 0x0;
|
||||||
|
String decodedJsonString = "";
|
||||||
|
|
||||||
|
for (int i = 0x0; i + 0x40 < jsonBytes.length; i++) {
|
||||||
|
index1 = (index1 + magicByteTranslated) % 0x100;
|
||||||
|
index2 = (index3 + newArray.get((index2 + newArray.get(index1)) % 0x100)) % 0x100;
|
||||||
|
index3 = (index3 + index1 + newArray.get(index1)) % 0x100;
|
||||||
|
swap1 = newArray.get(index1);
|
||||||
|
newArray.set(index1, newArray.get(index2));
|
||||||
|
newArray.set(index2,swap1);
|
||||||
|
xorNumber = newArray.get((index2 + newArray.get((index1 + newArray.get((xorNumber + index3) % 0x100)) % 0x100)) % 0x100);
|
||||||
|
decodedJsonString += Character.toString((char) signedToUnsigned((jsonBytes[i + 0x40] ^ xorNumber)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return decodedJsonString;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static long signedToUnsigned(int signed) {
|
||||||
|
return (byte) signed & 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
@@ -2,14 +2,43 @@ package com.rarchives.ripme.tst.ripper.rippers;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.rippers.HentaiNexusRipper;
|
import com.rarchives.ripme.ripper.rippers.HentaiNexusRipper;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class HentainexusRipperTest extends RippersTest {
|
public class HentainexusRipperTest extends RippersTest {
|
||||||
@Test
|
@Test
|
||||||
public void testHentaiNexusAlbum() throws IOException {
|
public void testHentaiNexusJson() throws IOException {
|
||||||
HentaiNexusRipper ripper = new HentaiNexusRipper(new URL("https://hentainexus.com/view/44"));
|
List<URL> testURLs = new ArrayList<>();
|
||||||
testRipper(ripper);
|
testURLs.add(new URL("https://hentainexus.com/view/9202"));
|
||||||
|
testURLs.add(new URL("https://hentainexus.com/read/9202"));
|
||||||
|
testURLs.add(new URL("https://hentainexus.com/view/9202#001"));
|
||||||
|
testURLs.add(new URL("https://hentainexus.com/read/9202#001"));
|
||||||
|
|
||||||
|
for (URL url : testURLs) {
|
||||||
|
|
||||||
|
HentaiNexusRipper ripper = new HentaiNexusRipper(url);
|
||||||
|
|
||||||
|
boolean testOK = false;
|
||||||
|
try {
|
||||||
|
|
||||||
|
String jsonEncodedString = ripper.getJsonEncodedStringFromPage();
|
||||||
|
String jsonDecodedString = ripper.decodeJsonString(jsonEncodedString);
|
||||||
|
JSONObject json = new JSONObject(jsonDecodedString);
|
||||||
|
// Fail test if JSON empty
|
||||||
|
testOK = !json.isEmpty();
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Fail test if JSON invalid, not present or other errors
|
||||||
|
testOK = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.assertEquals(true, testOK);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user