1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-02-21 06:32:42 +01:00

Fixed ripper for HentaiNexus

This commit is contained in:
PaaaulZ 2020-12-18 22:17:41 +01:00
parent e0d40df5ad
commit 0dded85ddd
2 changed files with 167 additions and 84 deletions

View File

@ -4,27 +4,22 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.utils.Http;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class HentaiNexusRipper extends AbstractHTMLRipper {
private Document firstPage;
private DownloadThreadPool hentainexusThreadPool = new DownloadThreadPool("hentainexus");
@Override
public DownloadThreadPool getThreadPool() {
return hentainexusThreadPool;
}
public class HentaiNexusRipper extends AbstractJSONRipper {
public HentaiNexusRipper(URL url) throws IOException {
super(url);
@ -34,7 +29,6 @@ public class HentaiNexusRipper extends AbstractHTMLRipper {
public String getHost() {
return "hentainexus";
}
@Override
public String getDomain() {
return "hentainexus.com";
@ -42,88 +36,148 @@ public class HentaiNexusRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://hentainexus\\.com/view/([a-zA-Z0-9_\\-%]*)/?$");
/*
Valid URLs are /view/id, /read/id and those 2 with #pagenumber
https://hentainexus.com/view/9202
https://hentainexus.com/read/9202
https://hentainexus.com/view/9202#001
https://hentainexus.com/read/9202#001
*/
Pattern p = Pattern.compile("^https?://hentainexus\\.com/(?:view|read)/([0-9]+)(?:\\#[0-9]+)*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected hentainexus.com URL format: " +
"hentainexus.com/view/NUMBER - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
if (firstPage == null) {
firstPage = Http.url(url).get();
}
return firstPage;
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<>();
Elements thumbs = doc.select("div.is-multiline > div.column > a");
for (Element el : thumbs) {
imageURLs.add("https://" + getDomain() + el.attr("href"));
}
return imageURLs;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Document gallery = Http.url(url).get();
return getHost() + "_" + gallery.select("h1.title").text();
} catch (IOException e) {
LOGGER.info("Falling back");
}
return super.getAlbumTitle(url);
"hentainexus.com/view/id OR hentainexus.com/read/id - got " + url + "instead");
}
@Override
public void downloadURL(URL url, int index) {
HentaiNexusImageThread t = new HentaiNexusImageThread(url, index);
hentainexusThreadPool.addThread(t);
addURLToDownload(url, getPrefix(index));
}
/**
* Helper class to find and download images found on "image" pages
*/
private class HentaiNexusImageThread extends Thread {
private URL url;
private int index;
HentaiNexusImageThread(URL url, int index) {
super();
this.url = url;
this.index = index;
@Override
protected List<String> getURLsFromJSON(JSONObject json) throws JSONException {
List<String> urlList = new ArrayList<>();
JSONArray imagesList = json.getJSONArray("f");
String host = json.getString("b");
String folder = json.getString("r");
String id = json.getString("i");
for (Object singleImage : imagesList) {
String hashTMP = ((JSONObject) singleImage).getString("h");
String fileNameTMP = ((JSONObject) singleImage).getString("p");
String imageUrlTMP = String.format("%s%s%s/%s/%s",host,folder,hashTMP,id,fileNameTMP);
urlList.add(imageUrlTMP);
}
@Override
public void run() {
fetchImage();
}
return urlList;
}
private void fetchImage() {
try {
Document doc = Http.url(url).retries(3).get();
Elements images = doc.select("figure.image > img");
if (images.isEmpty()) {
LOGGER.warn("Image not found at " + this.url);
return;
@Override
protected JSONObject getFirstPage() throws IOException {
String jsonEncodedString = getJsonEncodedStringFromPage();
String jsonDecodedString = decodeJsonString(jsonEncodedString);
return new JSONObject(jsonDecodedString);
}
public String getJsonEncodedStringFromPage() throws MalformedURLException, IOException
{
// Image data only appears on the /read/ page and not on the /view/ one.
URL readUrl = new URL(String.format("http://hentainexus.com/read/%s",getGID(url)));
Document document = Http.url(readUrl).response().parse();
for (Element scripts : document.getElementsByTag("script")) {
for (DataNode dataNode : scripts.dataNodes()) {
if (dataNode.getWholeData().contains("initReader")) {
// Extract JSON encoded string from the JavaScript initReader() call.
String data = dataNode.getWholeData().trim().replaceAll("\\r|\\n|\\t","");
Pattern p = Pattern.compile(".*?initReader\\(\"(.*?)\",.*?\\).*?");
Matcher m = p.matcher(data);
if (m.matches()) {
return m.group(1);
}
}
Element image = images.first();
String imgsrc = image.attr("src");
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(imgsrc), prefix);
} catch (IOException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
return "";
}
}
public String decodeJsonString(String jsonEncodedString)
{
/*
The initReader() JavaScript function accepts 2 parameters: a weird string and the window title (we can ignore this).
The weird string is a JSON string with some bytes shifted and swapped around and then encoded in base64.
The following code is a Java adaptation of the initRender() JavaScript function after manual deobfuscation.
*/
byte[] jsonBytes = Base64.getDecoder().decode(jsonEncodedString);
ArrayList unknownArray = new ArrayList();
ArrayList<Integer> indexesToUse = new ArrayList<>();
for (int i = 0x2; unknownArray.size() < 0x10; ++i) {
if (!indexesToUse.contains(i)) {
unknownArray.add(i);
for (int j = i << 0x1; j <= 0x100; j += i) {
if (!indexesToUse.contains(j)) {
indexesToUse.add(j);
}
}
}
}
byte magicByte = 0x0;
for (int i = 0x0; i < 0x40; i++) {
magicByte = (byte) (signedToUnsigned(magicByte) ^ signedToUnsigned(jsonBytes[i]));
for (int j = 0x0; j < 0x8; j++) {
long unsignedMagicByteTMP = signedToUnsigned(magicByte);
magicByte = (byte) ((unsignedMagicByteTMP & 0x1) == 1 ? unsignedMagicByteTMP >>> 0x1 ^ 0xc : unsignedMagicByteTMP >>> 0x1);
}
}
magicByte = (byte) (magicByte & 0x7);
ArrayList<Integer> newArray = new ArrayList();
for (int i = 0x0; i < 0x100; i++) {
newArray.add(i);
}
int newIndex = 0, backup = 0;
for (int i = 0x0; i < 0x100; i++) {
newIndex = (newIndex + newArray.get(i) + (int) signedToUnsigned(jsonBytes[i % 0x40])) % 0x100;
backup = newArray.get(i);
newArray.set(i, newArray.get(newIndex));
newArray.set(newIndex, backup);
}
int magicByteTranslated = (int) unknownArray.get(magicByte);
int index1 = 0x0, index2 = 0x0, index3 = 0x0, swap1 = 0x0, xorNumber = 0x0;
String decodedJsonString = "";
for (int i = 0x0; i + 0x40 < jsonBytes.length; i++) {
index1 = (index1 + magicByteTranslated) % 0x100;
index2 = (index3 + newArray.get((index2 + newArray.get(index1)) % 0x100)) % 0x100;
index3 = (index3 + index1 + newArray.get(index1)) % 0x100;
swap1 = newArray.get(index1);
newArray.set(index1, newArray.get(index2));
newArray.set(index2,swap1);
xorNumber = newArray.get((index2 + newArray.get((index1 + newArray.get((xorNumber + index3) % 0x100)) % 0x100)) % 0x100);
decodedJsonString += Character.toString((char) signedToUnsigned((jsonBytes[i + 0x40] ^ xorNumber)));
}
return decodedJsonString;
}
private static long signedToUnsigned(int signed) {
return (byte) signed & 0xFF;
}
}

View File

@ -2,14 +2,43 @@ package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import com.rarchives.ripme.ripper.rippers.HentaiNexusRipper;
import org.json.JSONObject;
import org.junit.Assert;
import org.junit.jupiter.api.Test;
public class HentainexusRipperTest extends RippersTest {
@Test
public void testHentaiNexusAlbum() throws IOException {
HentaiNexusRipper ripper = new HentaiNexusRipper(new URL("https://hentainexus.com/view/44"));
testRipper(ripper);
public void testHentaiNexusJson() throws IOException {
List<URL> testURLs = new ArrayList<>();
testURLs.add(new URL("https://hentainexus.com/view/9202"));
testURLs.add(new URL("https://hentainexus.com/read/9202"));
testURLs.add(new URL("https://hentainexus.com/view/9202#001"));
testURLs.add(new URL("https://hentainexus.com/read/9202#001"));
for (URL url : testURLs) {
HentaiNexusRipper ripper = new HentaiNexusRipper(url);
boolean testOK = false;
try {
String jsonEncodedString = ripper.getJsonEncodedStringFromPage();
String jsonDecodedString = ripper.decodeJsonString(jsonEncodedString);
JSONObject json = new JSONObject(jsonDecodedString);
// Fail test if JSON empty
testOK = !json.isEmpty();
} catch (Exception e) {
// Fail test if JSON invalid, not present or other errors
testOK = false;
}
Assert.assertEquals(true, testOK);
}
}
}