From 0dded85ddd5a16a0bf32020b0693d0d2298e9088 Mon Sep 17 00:00:00 2001 From: PaaaulZ <46759927+PaaaulZ@users.noreply.github.com> Date: Fri, 18 Dec 2020 22:17:41 +0100 Subject: [PATCH] Fixed ripper for HentaiNexus --- .../ripper/rippers/HentaiNexusRipper.java | 216 +++++++++++------- .../ripper/rippers/HentainexusRipperTest.java | 35 ++- 2 files changed, 167 insertions(+), 84 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java index 56ce0d2f..ca709418 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaiNexusRipper.java @@ -4,27 +4,22 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Base64; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.utils.Http; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import com.rarchives.ripme.ripper.AbstractJSONRipper; +import org.jsoup.nodes.DataNode; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AbstractHTMLRipper; -import com.rarchives.ripme.ripper.DownloadThreadPool; -import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; - -public class HentaiNexusRipper extends AbstractHTMLRipper { - - private Document firstPage; - private DownloadThreadPool hentainexusThreadPool = new DownloadThreadPool("hentainexus"); - @Override - public DownloadThreadPool getThreadPool() { - return hentainexusThreadPool; - } +public class HentaiNexusRipper extends AbstractJSONRipper { public HentaiNexusRipper(URL url) throws IOException { super(url); @@ -34,7 +29,6 @@ public class HentaiNexusRipper extends AbstractHTMLRipper { public String getHost() { return "hentainexus"; } - @Override public String getDomain() { return "hentainexus.com"; @@ -42,88 +36,148 @@ public class HentaiNexusRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("https?://hentainexus\\.com/view/([a-zA-Z0-9_\\-%]*)/?$"); + /* + Valid URLs are /view/id, /read/id and those 2 with #pagenumber + https://hentainexus.com/view/9202 + https://hentainexus.com/read/9202 + https://hentainexus.com/view/9202#001 + https://hentainexus.com/read/9202#001 + */ + + Pattern p = Pattern.compile("^https?://hentainexus\\.com/(?:view|read)/([0-9]+)(?:\\#[0-9]+)*$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); } throw new MalformedURLException("Expected hentainexus.com URL format: " + - "hentainexus.com/view/NUMBER - got " + url + " instead"); - } - - @Override - public Document getFirstPage() throws IOException { - // "url" is an instance field of the superclass - if (firstPage == null) { - firstPage = Http.url(url).get(); - } - return firstPage; - } - - @Override - public List getURLsFromPage(Document doc) { - List imageURLs = new ArrayList<>(); - Elements thumbs = doc.select("div.is-multiline > div.column > a"); - for (Element el : thumbs) { - imageURLs.add("https://" + getDomain() + el.attr("href")); - } - return imageURLs; - } - - @Override - public String getAlbumTitle(URL url) throws MalformedURLException { - try { - Document gallery = Http.url(url).get(); - return getHost() + "_" + gallery.select("h1.title").text(); - } catch (IOException e) { - LOGGER.info("Falling back"); - } - - return super.getAlbumTitle(url); + "hentainexus.com/view/id OR hentainexus.com/read/id - got " + url + "instead"); } @Override public void downloadURL(URL url, int index) { - HentaiNexusImageThread t = new HentaiNexusImageThread(url, index); - hentainexusThreadPool.addThread(t); + addURLToDownload(url, getPrefix(index)); } - /** - * Helper class to find and download images found on "image" pages - */ - private class HentaiNexusImageThread extends Thread { - private URL url; - private int index; - HentaiNexusImageThread(URL url, int index) { - super(); - this.url = url; - this.index = index; + @Override + protected List getURLsFromJSON(JSONObject json) throws JSONException { + + List urlList = new ArrayList<>(); + + JSONArray imagesList = json.getJSONArray("f"); + String host = json.getString("b"); + String folder = json.getString("r"); + String id = json.getString("i"); + + for (Object singleImage : imagesList) { + String hashTMP = ((JSONObject) singleImage).getString("h"); + String fileNameTMP = ((JSONObject) singleImage).getString("p"); + String imageUrlTMP = String.format("%s%s%s/%s/%s",host,folder,hashTMP,id,fileNameTMP); + urlList.add(imageUrlTMP); } - @Override - public void run() { - fetchImage(); - } + return urlList; + } - private void fetchImage() { - try { - Document doc = Http.url(url).retries(3).get(); - Elements images = doc.select("figure.image > img"); - if (images.isEmpty()) { - LOGGER.warn("Image not found at " + this.url); - return; + @Override + protected JSONObject getFirstPage() throws IOException { + String jsonEncodedString = getJsonEncodedStringFromPage(); + String jsonDecodedString = decodeJsonString(jsonEncodedString); + return new JSONObject(jsonDecodedString); + } + + public String getJsonEncodedStringFromPage() throws MalformedURLException, IOException + { + // Image data only appears on the /read/ page and not on the /view/ one. + URL readUrl = new URL(String.format("http://hentainexus.com/read/%s",getGID(url))); + Document document = Http.url(readUrl).response().parse(); + + for (Element scripts : document.getElementsByTag("script")) { + for (DataNode dataNode : scripts.dataNodes()) { + if (dataNode.getWholeData().contains("initReader")) { + // Extract JSON encoded string from the JavaScript initReader() call. + String data = dataNode.getWholeData().trim().replaceAll("\\r|\\n|\\t",""); + + Pattern p = Pattern.compile(".*?initReader\\(\"(.*?)\",.*?\\).*?"); + Matcher m = p.matcher(data); + if (m.matches()) { + return m.group(1); + } } - Element image = images.first(); - String imgsrc = image.attr("src"); - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(imgsrc), prefix); - } catch (IOException e) { - LOGGER.error("[!] Exception while loading/parsing " + this.url, e); } } + return ""; } -} + + public String decodeJsonString(String jsonEncodedString) + { + /* + The initReader() JavaScript function accepts 2 parameters: a weird string and the window title (we can ignore this). + The weird string is a JSON string with some bytes shifted and swapped around and then encoded in base64. + The following code is a Java adaptation of the initRender() JavaScript function after manual deobfuscation. + */ + + byte[] jsonBytes = Base64.getDecoder().decode(jsonEncodedString); + + ArrayList unknownArray = new ArrayList(); + ArrayList indexesToUse = new ArrayList<>(); + + for (int i = 0x2; unknownArray.size() < 0x10; ++i) { + if (!indexesToUse.contains(i)) { + unknownArray.add(i); + for (int j = i << 0x1; j <= 0x100; j += i) { + if (!indexesToUse.contains(j)) { + indexesToUse.add(j); + } + } + } + } + + byte magicByte = 0x0; + for (int i = 0x0; i < 0x40; i++) { + magicByte = (byte) (signedToUnsigned(magicByte) ^ signedToUnsigned(jsonBytes[i])); + for (int j = 0x0; j < 0x8; j++) { + long unsignedMagicByteTMP = signedToUnsigned(magicByte); + magicByte = (byte) ((unsignedMagicByteTMP & 0x1) == 1 ? unsignedMagicByteTMP >>> 0x1 ^ 0xc : unsignedMagicByteTMP >>> 0x1); + } + } + + magicByte = (byte) (magicByte & 0x7); + ArrayList newArray = new ArrayList(); + + for (int i = 0x0; i < 0x100; i++) { + newArray.add(i); + } + + int newIndex = 0, backup = 0; + for (int i = 0x0; i < 0x100; i++) { + newIndex = (newIndex + newArray.get(i) + (int) signedToUnsigned(jsonBytes[i % 0x40])) % 0x100; + backup = newArray.get(i); + newArray.set(i, newArray.get(newIndex)); + newArray.set(newIndex, backup); + } + + int magicByteTranslated = (int) unknownArray.get(magicByte); + int index1 = 0x0, index2 = 0x0, index3 = 0x0, swap1 = 0x0, xorNumber = 0x0; + String decodedJsonString = ""; + + for (int i = 0x0; i + 0x40 < jsonBytes.length; i++) { + index1 = (index1 + magicByteTranslated) % 0x100; + index2 = (index3 + newArray.get((index2 + newArray.get(index1)) % 0x100)) % 0x100; + index3 = (index3 + index1 + newArray.get(index1)) % 0x100; + swap1 = newArray.get(index1); + newArray.set(index1, newArray.get(index2)); + newArray.set(index2,swap1); + xorNumber = newArray.get((index2 + newArray.get((index1 + newArray.get((xorNumber + index3) % 0x100)) % 0x100)) % 0x100); + decodedJsonString += Character.toString((char) signedToUnsigned((jsonBytes[i + 0x40] ^ xorNumber))); + } + + return decodedJsonString; + } + + + private static long signedToUnsigned(int signed) { + return (byte) signed & 0xFF; + } + +} \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HentainexusRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HentainexusRipperTest.java index cfe540fb..a244276c 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HentainexusRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HentainexusRipperTest.java @@ -2,14 +2,43 @@ package com.rarchives.ripme.tst.ripper.rippers; import java.io.IOException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import com.rarchives.ripme.ripper.rippers.HentaiNexusRipper; +import org.json.JSONObject; +import org.junit.Assert; import org.junit.jupiter.api.Test; public class HentainexusRipperTest extends RippersTest { @Test - public void testHentaiNexusAlbum() throws IOException { - HentaiNexusRipper ripper = new HentaiNexusRipper(new URL("https://hentainexus.com/view/44")); - testRipper(ripper); + public void testHentaiNexusJson() throws IOException { + List testURLs = new ArrayList<>(); + testURLs.add(new URL("https://hentainexus.com/view/9202")); + testURLs.add(new URL("https://hentainexus.com/read/9202")); + testURLs.add(new URL("https://hentainexus.com/view/9202#001")); + testURLs.add(new URL("https://hentainexus.com/read/9202#001")); + + for (URL url : testURLs) { + + HentaiNexusRipper ripper = new HentaiNexusRipper(url); + + boolean testOK = false; + try { + + String jsonEncodedString = ripper.getJsonEncodedStringFromPage(); + String jsonDecodedString = ripper.decodeJsonString(jsonEncodedString); + JSONObject json = new JSONObject(jsonDecodedString); + // Fail test if JSON empty + testOK = !json.isEmpty(); + + } catch (Exception e) { + // Fail test if JSON invalid, not present or other errors + testOK = false; + } + + Assert.assertEquals(true, testOK); + } + } }