1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-11 08:24:10 +02:00

Merge branch 'main' into master

This commit is contained in:
Stefan Aladzic
2021-02-12 23:02:43 +01:00
committed by GitHub
29 changed files with 817 additions and 116 deletions

View File

@@ -9,7 +9,12 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
java: [1.8, 1.14]
java: [1.8]
include: # test newest java on one os only, upload from ubuntu java8
- os: ubuntu-latest
java: 1.15
- os: ubuntu-latest
upload: true
steps:
- uses: actions/checkout@v1
@@ -18,4 +23,12 @@ jobs:
with:
java-version: ${{ matrix.java }}
- name: Build with Maven
run: mvn package --file pom.xml
run: mvn -B package assembly:single --file pom.xml
- name: upload jar as asset
if: matrix.upload
uses: actions/upload-artifact@v2
with:
name: zipped-ripme-jar
path: target/*dependencies.jar
# vim:set ts=2 sw=2 et:

6
.gitignore vendored
View File

@@ -80,6 +80,12 @@ buildNumber.properties
# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
!/.mvn/wrapper/maven-wrapper.jar
### gradle ###
/.gradle
/build
# Avoid ignoring gradle wrapper jar file (.jar files are usually ignored)
!/gradle/wrapper/gradle-wrapper.jar
### Windows ###
# Windows thumbnail cache files
Thumbs.db

View File

@@ -32,7 +32,7 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/
On macOS, there is a [cask](https://github.com/Homebrew/homebrew-cask/blob/master/Casks/ripme.rb).
```
brew cask install ripme && xattr -d com.apple.quarantine /Applications/ripme.jar
brew install --cask ripme && xattr -d com.apple.quarantine /Applications/ripme.jar
```
## Changelog

0
java Normal file
View File

View File

@@ -83,6 +83,11 @@
<artifactId>httpmime</artifactId>
<version>4.3.3</version>
</dependency>
<dependency>
<groupId>org.java-websocket</groupId>
<artifactId>Java-WebSocket</artifactId>
<version>1.5.1</version>
</dependency>
</dependencies>
<build>
<plugins>
@@ -132,7 +137,7 @@
<!-- At time of writing: JaCoCo is (allegedly) the only coverage report generator that supports Java 8 -->
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.5</version>
<version>0.8.6</version>
<executions>
<execution>
<id>prepare-agent</id>

View File

@@ -282,7 +282,14 @@ class DownloadFileThread extends Thread {
logger.debug("IOException", e);
logger.error("[!] " + Utils.getLocalizedString("exception.while.downloading.file") + ": " + url + " - "
+ e.getMessage());
} finally {
} catch (NullPointerException npe){
logger.error("[!] " + Utils.getLocalizedString("failed.to.download") + " for URL " + url);
observer.downloadErrored(url,
Utils.getLocalizedString("failed.to.download") + " " + url.toExternalForm());
return;
}finally {
// Close any open streams
try {
if (bis != null) {

View File

@@ -0,0 +1,60 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.*;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
public class CyberdropRipper extends AbstractHTMLRipper {
public CyberdropRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "cyberdrop";
}
@Override
protected Document getFirstPage() throws IOException {
return Http.url(url).get();
}
@Override
public String getDomain() {
return "cyberdrop.me";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://cyberdrop\\.me/a/([a-zA-Z0-9]+).*?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected cyberdrop.me URL format: " +
"https://cyberdrop.me/a/xxxxxxxx - got " + url + "instead");
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
@Override
protected List<String> getURLsFromPage(Document page) {
ArrayList<String> urls = new ArrayList<>();
for (Element element: page.getElementsByClass("image")) {
urls.add(element.attr("href"));
}
return urls;
}
}

View File

@@ -57,7 +57,7 @@ public class GfycatRipper extends AbstractHTMLRipper {
}
public boolean isProfile() {
Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9]+).*$");
Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9\\.\\-\\_]+).*$");
Matcher m = p.matcher(url.toExternalForm());
return m.matches();
}
@@ -79,11 +79,11 @@ public class GfycatRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(thumbs\\.|[wm\\.]*)gfycat\\.com/@?([a-zA-Z0-9]+).*$");
Pattern p = Pattern.compile("^https?://(?:thumbs\\.|[wm\\.]*)gfycat\\.com/@?([a-zA-Z0-9\\.\\-\\_]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches())
return m.group(2);
return m.group(1);
throw new MalformedURLException(
"Expected gfycat.com format: "

View File

@@ -4,27 +4,22 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.utils.Http;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class HentaiNexusRipper extends AbstractHTMLRipper {
private Document firstPage;
private DownloadThreadPool hentainexusThreadPool = new DownloadThreadPool("hentainexus");
@Override
public DownloadThreadPool getThreadPool() {
return hentainexusThreadPool;
}
public class HentaiNexusRipper extends AbstractJSONRipper {
public HentaiNexusRipper(URL url) throws IOException {
super(url);
@@ -34,7 +29,6 @@ public class HentaiNexusRipper extends AbstractHTMLRipper {
public String getHost() {
return "hentainexus";
}
@Override
public String getDomain() {
return "hentainexus.com";
@@ -42,88 +36,148 @@ public class HentaiNexusRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://hentainexus\\.com/view/([a-zA-Z0-9_\\-%]*)/?$");
/*
Valid URLs are /view/id, /read/id and those 2 with #pagenumber
https://hentainexus.com/view/9202
https://hentainexus.com/read/9202
https://hentainexus.com/view/9202#001
https://hentainexus.com/read/9202#001
*/
Pattern p = Pattern.compile("^https?://hentainexus\\.com/(?:view|read)/([0-9]+)(?:\\#[0-9]+)*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected hentainexus.com URL format: " +
"hentainexus.com/view/NUMBER - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass
if (firstPage == null) {
firstPage = Http.url(url).get();
}
return firstPage;
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<>();
Elements thumbs = doc.select("div.is-multiline > div.column > a");
for (Element el : thumbs) {
imageURLs.add("https://" + getDomain() + el.attr("href"));
}
return imageURLs;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Document gallery = Http.url(url).get();
return getHost() + "_" + gallery.select("h1.title").text();
} catch (IOException e) {
LOGGER.info("Falling back");
}
return super.getAlbumTitle(url);
"hentainexus.com/view/id OR hentainexus.com/read/id - got " + url + "instead");
}
@Override
public void downloadURL(URL url, int index) {
HentaiNexusImageThread t = new HentaiNexusImageThread(url, index);
hentainexusThreadPool.addThread(t);
addURLToDownload(url, getPrefix(index));
}
/**
* Helper class to find and download images found on "image" pages
*/
private class HentaiNexusImageThread extends Thread {
private URL url;
private int index;
HentaiNexusImageThread(URL url, int index) {
super();
this.url = url;
this.index = index;
@Override
protected List<String> getURLsFromJSON(JSONObject json) throws JSONException {
List<String> urlList = new ArrayList<>();
JSONArray imagesList = json.getJSONArray("f");
String host = json.getString("b");
String folder = json.getString("r");
String id = json.getString("i");
for (Object singleImage : imagesList) {
String hashTMP = ((JSONObject) singleImage).getString("h");
String fileNameTMP = ((JSONObject) singleImage).getString("p");
String imageUrlTMP = String.format("%s%s%s/%s/%s",host,folder,hashTMP,id,fileNameTMP);
urlList.add(imageUrlTMP);
}
return urlList;
}
@Override
public void run() {
fetchImage();
protected JSONObject getFirstPage() throws IOException {
String jsonEncodedString = getJsonEncodedStringFromPage();
String jsonDecodedString = decodeJsonString(jsonEncodedString);
return new JSONObject(jsonDecodedString);
}
private void fetchImage() {
try {
Document doc = Http.url(url).retries(3).get();
Elements images = doc.select("figure.image > img");
if (images.isEmpty()) {
LOGGER.warn("Image not found at " + this.url);
return;
}
Element image = images.first();
String imgsrc = image.attr("src");
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(imgsrc), prefix);
} catch (IOException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
public String getJsonEncodedStringFromPage() throws MalformedURLException, IOException
{
// Image data only appears on the /read/ page and not on the /view/ one.
URL readUrl = new URL(String.format("http://hentainexus.com/read/%s",getGID(url)));
Document document = Http.url(readUrl).response().parse();
for (Element scripts : document.getElementsByTag("script")) {
for (DataNode dataNode : scripts.dataNodes()) {
if (dataNode.getWholeData().contains("initReader")) {
// Extract JSON encoded string from the JavaScript initReader() call.
String data = dataNode.getWholeData().trim().replaceAll("\\r|\\n|\\t","");
Pattern p = Pattern.compile(".*?initReader\\(\"(.*?)\",.*?\\).*?");
Matcher m = p.matcher(data);
if (m.matches()) {
return m.group(1);
}
}
}
}
return "";
}
public String decodeJsonString(String jsonEncodedString)
{
/*
The initReader() JavaScript function accepts 2 parameters: a weird string and the window title (we can ignore this).
The weird string is a JSON string with some bytes shifted and swapped around and then encoded in base64.
The following code is a Java adaptation of the initRender() JavaScript function after manual deobfuscation.
*/
byte[] jsonBytes = Base64.getDecoder().decode(jsonEncodedString);
ArrayList unknownArray = new ArrayList();
ArrayList<Integer> indexesToUse = new ArrayList<>();
for (int i = 0x2; unknownArray.size() < 0x10; ++i) {
if (!indexesToUse.contains(i)) {
unknownArray.add(i);
for (int j = i << 0x1; j <= 0x100; j += i) {
if (!indexesToUse.contains(j)) {
indexesToUse.add(j);
}
}
}
}
byte magicByte = 0x0;
for (int i = 0x0; i < 0x40; i++) {
magicByte = (byte) (signedToUnsigned(magicByte) ^ signedToUnsigned(jsonBytes[i]));
for (int j = 0x0; j < 0x8; j++) {
long unsignedMagicByteTMP = signedToUnsigned(magicByte);
magicByte = (byte) ((unsignedMagicByteTMP & 0x1) == 1 ? unsignedMagicByteTMP >>> 0x1 ^ 0xc : unsignedMagicByteTMP >>> 0x1);
}
}
magicByte = (byte) (magicByte & 0x7);
ArrayList<Integer> newArray = new ArrayList();
for (int i = 0x0; i < 0x100; i++) {
newArray.add(i);
}
int newIndex = 0, backup = 0;
for (int i = 0x0; i < 0x100; i++) {
newIndex = (newIndex + newArray.get(i) + (int) signedToUnsigned(jsonBytes[i % 0x40])) % 0x100;
backup = newArray.get(i);
newArray.set(i, newArray.get(newIndex));
newArray.set(newIndex, backup);
}
int magicByteTranslated = (int) unknownArray.get(magicByte);
int index1 = 0x0, index2 = 0x0, index3 = 0x0, swap1 = 0x0, xorNumber = 0x0;
String decodedJsonString = "";
for (int i = 0x0; i + 0x40 < jsonBytes.length; i++) {
index1 = (index1 + magicByteTranslated) % 0x100;
index2 = (index3 + newArray.get((index2 + newArray.get(index1)) % 0x100)) % 0x100;
index3 = (index3 + index1 + newArray.get(index1)) % 0x100;
swap1 = newArray.get(index1);
newArray.set(index1, newArray.get(index2));
newArray.set(index2,swap1);
xorNumber = newArray.get((index2 + newArray.get((index1 + newArray.get((xorNumber + index3) % 0x100)) % 0x100)) % 0x100);
decodedJsonString += Character.toString((char) signedToUnsigned((jsonBytes[i + 0x40] ^ xorNumber)));
}
return decodedJsonString;
}
private static long signedToUnsigned(int signed) {
return (byte) signed & 0xFF;
}
}

View File

@@ -0,0 +1,293 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.java_websocket.client.WebSocketClient;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.java_websocket.handshake.ServerHandshake;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
public class ScrolllerRipper extends AbstractJSONRipper {
public ScrolllerRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "scrolller";
}
@Override
public String getDomain() {
return "scrolller.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
// Typical URL is: https://scrolller.com/r/subreddit
// Parameters like "filter" and "sort" can be passed (ex: https://scrolller.com/r/subreddit?filter=xxx&sort=yyyy)
Pattern p = Pattern.compile("^https?://scrolller\\.com/r/([a-zA-Z0-9]+).*?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected scrolller.com URL format: " +
"scrolller.com/r/subreddit OR scroller.com/r/subreddit?filter= - got " + url + "instead");
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
private JSONObject prepareQuery(String iterator, String gid, String sortByString) throws IOException, URISyntaxException {
String QUERY_NOSORT = "query SubredditQuery( $url: String! $filter: SubredditPostFilter $iterator: String ) { getSubreddit(url: $url) { children( limit: 50 iterator: $iterator filter: $filter ) { iterator items { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } } } }";
String QUERY_SORT = "subscription SubredditSubscription( $url: String! $sortBy: SubredditSortBy $timespan: SubredditTimespan $iterator: String $limit: Int $filter: SubredditPostFilter ) { fetchSubreddit( url: $url sortBy: $sortBy timespan: $timespan iterator: $iterator limit: $limit filter: $filter ) { __typename ... on Subreddit { __typename url title secondaryTitle description createdAt isNsfw subscribers isComplete itemCount videoCount pictureCount albumCount isFollowing } ... on SubredditPost { __typename url title subredditTitle subredditUrl redditPath isNsfw albumUrl isFavorite mediaSources { url width height isOptimized } } ... on Iterator { iterator } ... on Error { message } } }";
String filterString = convertFilterString(getParameter(this.url,"filter"));
JSONObject variablesObject = new JSONObject().put("url", String.format("/r/%s", gid)).put("sortBy", sortByString.toUpperCase());
JSONObject finalQueryObject = new JSONObject().put("variables", variablesObject).put("query", sortByString.equals("") ? QUERY_NOSORT : QUERY_SORT);
if (iterator != null) {
// Iterator is not present on the first page
variablesObject.put("iterator", iterator);
}
if (!filterString.equals("NOFILTER")) {
variablesObject.put("filter", filterString);
}
return sortByString.equals("") ? getPosts(finalQueryObject) : getPostsSorted(finalQueryObject);
}
public String convertFilterString(String filterParameter) {
// Converts the ?filter= parameter of the URL to one that can be used in the GraphQL query
// I could basically remove the last "s" and call toUpperCase instead of this switch statement but this looks easier to read.
switch (filterParameter.toLowerCase()) {
case "pictures":
return "PICTURE";
case "videos":
return "VIDEO";
case "albums":
return "ALBUM";
case "":
return "NOFILTER";
default:
LOGGER.error(String.format("Invalid filter %s using no filter",filterParameter));
return "";
}
}
public String getParameter(URL url, String parameter) throws MalformedURLException {
// Gets passed parameters from the URL
String toReplace = String.format("https://scrolller.com/r/%s?",getGID(url));
List<NameValuePair> args= URLEncodedUtils.parse(url.toExternalForm(), Charset.defaultCharset());
for (NameValuePair arg:args) {
// First parameter contains part of the url so we have to remove it
// Ex: for the url https://scrolller.com/r/CatsStandingUp?filter=xxxx&sort=yyyy
// 1) arg.getName() => https://scrolller.com/r/CatsStandingUp?filter
// 2) arg.getName() => sort
if (arg.getName().replace(toReplace,"").toLowerCase().equals((parameter))) {
return arg.getValue();
}
}
return "";
}
private JSONObject getPosts(JSONObject data) {
// The actual GraphQL query call
try {
String url = "https://api.scrolller.com/api/v2/graphql";
URL obj = new URL(url);
HttpURLConnection conn = (HttpURLConnection) obj.openConnection();
conn.setReadTimeout(5000);
conn.addRequestProperty("Accept-Language", "en-US,en;q=0.8");
conn.addRequestProperty("User-Agent", "Mozilla");
conn.addRequestProperty("Referer", "scrolller.com");
conn.setDoOutput(true);
OutputStreamWriter w = new OutputStreamWriter(conn.getOutputStream(), "UTF-8");
w.write(data.toString());
w.close();
BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String inputLine;
StringBuffer jsonString = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
jsonString.append(inputLine);
}
in.close();
conn.disconnect();
return new JSONObject(jsonString.toString());
} catch (Exception e) {
e.printStackTrace();
}
return new JSONObject("{}");
}
private JSONObject getPostsSorted(JSONObject data) throws MalformedURLException {
// The actual GraphQL query call (if sort parameter is present)
try {
ArrayList<String> postsJsonStrings = new ArrayList<>();
WebSocketClient wsc = new WebSocketClient(new URI("wss://api.scrolller.com/api/v2/graphql")) {
@Override
public void onOpen(ServerHandshake serverHandshake) {
// As soon as the WebSocket connects send our query
this.send(data.toString());
}
@Override
public void onMessage(String s) {
postsJsonStrings.add(s);
if (new JSONObject(s).getJSONObject("data").getJSONObject("fetchSubreddit").has("iterator")) {
this.close();
}
}
@Override
public void onClose(int i, String s, boolean b) {
}
@Override
public void onError(Exception e) {
LOGGER.error(String.format("WebSocket error, server reported %s", e.getMessage()));
}
};
wsc.connect();
while (!wsc.isClosed()) {
// Posts list is not over until the connection closes.
}
JSONObject finalObject = new JSONObject();
JSONArray posts = new JSONArray();
// Iterator is the last object in the post list, let's duplicate it in his own object for clarity.
finalObject.put("iterator", new JSONObject(postsJsonStrings.get(postsJsonStrings.size()-1)));
for (String postString : postsJsonStrings) {
posts.put(new JSONObject(postString));
}
finalObject.put("posts", posts);
if (finalObject.getJSONArray("posts").length() == 1 && !finalObject.getJSONArray("posts").getJSONObject(0).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) {
// Only iterator, no posts.
return null;
}
return finalObject;
} catch (URISyntaxException ue) {
// Nothing to catch, it's an hardcoded URI.
}
return null;
}
@Override
protected List<String> getURLsFromJSON(JSONObject json) throws JSONException {
boolean sortRequested = json.has("posts");
int bestArea = 0;
String bestUrl = "";
List<String> list = new ArrayList<>();
JSONArray itemsList = sortRequested ? json.getJSONArray("posts") : json.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").getJSONArray("items");
for (Object item : itemsList) {
if (sortRequested && !((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").has("mediaSources")) {
continue;
}
JSONArray sourcesTMP = sortRequested ? ((JSONObject) item).getJSONObject("data").getJSONObject("fetchSubreddit").getJSONArray("mediaSources") : ((JSONObject) item).getJSONArray("mediaSources");
for (Object sourceTMP : sourcesTMP)
{
int widthTMP = ((JSONObject) sourceTMP).getInt("width");
int heightTMP = ((JSONObject) sourceTMP).getInt("height");
int areaTMP = widthTMP * heightTMP;
if (areaTMP > bestArea) {
bestArea = widthTMP;
bestUrl = ((JSONObject) sourceTMP).getString("url");
}
}
list.add(bestUrl);
bestUrl = "";
bestArea = 0;
}
return list;
}
@Override
protected JSONObject getFirstPage() throws IOException {
try {
return prepareQuery(null, this.getGID(url), getParameter(url,"sort"));
} catch (URISyntaxException e) {
LOGGER.error(String.format("Error obtaining first page: %s", e.getMessage()));
return null;
}
}
@Override
public JSONObject getNextPage(JSONObject source) throws IOException {
// Every call the the API contains an "iterator" string that we need to pass to the API to get the next page
// Checking if iterator is null is not working for some reason, hence why the weird "iterator.toString().equals("null")"
Object iterator = null;
if (source.has("iterator")) {
// Sort requested, custom JSON.
iterator = source.getJSONObject("iterator").getJSONObject("data").getJSONObject("fetchSubreddit").get("iterator");
} else {
iterator = source.getJSONObject("data").getJSONObject("getSubreddit").getJSONObject("children").get("iterator");
}
if (!iterator.toString().equals("null")) {
// Need to change page.
try {
return prepareQuery(iterator.toString(), this.getGID(url), getParameter(url,"sort"));
} catch (URISyntaxException e) {
LOGGER.error(String.format("Error changing page: %s", e.getMessage()));
return null;
}
} else {
return null;
}
}
}

View File

@@ -0,0 +1,68 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SoundgasmRipper extends AbstractHTMLRipper {
private static final String HOST = "soundgasm.net";
public SoundgasmRipper(URL url) throws IOException {
super(new URL(url.toExternalForm()));
}
@Override
protected String getDomain() { return "soundgasm.net"; }
@Override
public String getHost() { return "soundgasm"; }
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^/u/([a-zA-Z0-9_-]+)/([a-zA-Z0-9_-]+).*$");
Matcher m = p.matcher(url.getFile());
if (m.find()) {
return m.group(m.groupCount());
}
throw new MalformedURLException(
"Expected soundgasm.net format: "
+ "soundgasm.net/u/username/id or "
+ " Got: " + url);
}
@Override
public Document getFirstPage() throws IOException {
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> res = new ArrayList<>();
Elements script = page.select("script");
Pattern p = Pattern.compile("m4a\\:\\s\"(https?:.*)\\\"");
for (Element e: script) {
Matcher m = p.matcher(e.data());
if (m.find()) { res.add(m.group(1)); }
}
return res;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@@ -48,8 +48,7 @@ public class XhamsterRipper extends AbstractHTMLRipper {
return url;
}
String URLToReturn = url.toExternalForm();
URLToReturn = URLToReturn.replaceAll("https?://\\w?\\w?\\.?xhamster\\.", "https://m.xhamster.");
URLToReturn = URLToReturn.replaceAll("https?://xhamster2\\.", "https://m.xhamster2.");
URLToReturn = URLToReturn.replaceAll("https?://\\w?\\w?\\.?xhamster([^<]*)\\.", "https://m.xhamster$1.");
URL san_url = new URL(URLToReturn);
LOGGER.info("sanitized URL is " + san_url.toExternalForm());
return san_url;
@@ -57,20 +56,20 @@ public class XhamsterRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[\\w\\w.]*xhamster2?\\.(com|one|desi)/photos/gallery/.*?(\\d+)$");
Pattern p = Pattern.compile("^https?://([\\w\\w]*\\.)?xhamster([^<]*)\\.(com|one|desi)/photos/gallery/.*?(\\d+)$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
return m.group(4);
}
p = Pattern.compile("^https?://[\\w\\w.]*xhamster2?\\.(com|one|desi)/users/([a-zA-Z0-9_-]+)/(photos|videos)(/\\d+)?");
p = Pattern.compile("^https?://[\\w\\w.]*xhamster([^<]*)\\.(com|one|desi)/users/([a-zA-Z0-9_-]+)/(photos|videos)(/\\d+)?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return "user_" + m.group(1);
}
p = Pattern.compile("^https?://.*xhamster2?\\.(com|one|desi)/(movies|videos)/(.*)$");
p = Pattern.compile("^https?://.*xhamster([^<]*)\\.(com|one|desi)/(movies|videos)/(.*$)");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(2);
return m.group(4);
}
throw new MalformedURLException(
@@ -97,7 +96,7 @@ public class XhamsterRipper extends AbstractHTMLRipper {
@Override
public boolean pageContainsAlbums(URL url) {
Pattern p = Pattern.compile("^https?://[\\w\\w.]*xhamster2?\\.(com|one|desi)/users/([a-zA-Z0-9_-]+)/(photos|videos)(/\\d+)?");
Pattern p = Pattern.compile("^https?://[\\w\\w.]*xhamster([^<]*)\\.(com|one|desi)/users/([a-zA-Z0-9_-]+)/(photos|videos)(/\\d+)?");
Matcher m = p.matcher(url.toExternalForm());
LOGGER.info("Checking if page has albums");
LOGGER.info(m.matches());
@@ -113,17 +112,17 @@ public class XhamsterRipper extends AbstractHTMLRipper {
@Override
public boolean canRip(URL url) {
Pattern p = Pattern.compile("^https?://([\\w\\w]*\\.)?xhamster2?\\.(com|one|desi)/photos/gallery/.*?(\\d+)$");
Pattern p = Pattern.compile("^https?://([\\w\\w]*\\.)?xhamster([^<]*)\\.(com|one|desi)/photos/gallery/.*?(\\d+)$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return true;
}
p = Pattern.compile("^https?://[\\w\\w.]*xhamster2?\\.(com|one|desi)/users/([a-zA-Z0-9_-]+)/(photos|videos)(/\\d+)?");
p = Pattern.compile("^https?://[\\w\\w.]*xhamster([^<]*)\\.(com|one|desi)/users/([a-zA-Z0-9_-]+)/(photos|videos)(/\\d+)?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return true;
}
p = Pattern.compile("^https?://.*xhamster2?\\.(com|one|desi)/(movies|videos)/.*$");
p = Pattern.compile("^https?://.*xhamster([^<]*)\\.(com|one|desi)/(movies|videos)/(.*$)");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return true;
@@ -132,7 +131,7 @@ public class XhamsterRipper extends AbstractHTMLRipper {
}
private boolean isVideoUrl(URL url) {
Pattern p = Pattern.compile("^https?://.*xhamster2?\\.(com|one|desi)/(movies|videos)/.*$");
Pattern p = Pattern.compile("^https?://.*xhamster([^<]*)\\.(com|one|desi)/(movies|videos)/(.*$)");
Matcher m = p.matcher(url.toExternalForm());
return m.matches();
}
@@ -141,10 +140,8 @@ public class XhamsterRipper extends AbstractHTMLRipper {
public Document getNextPage(Document doc) throws IOException {
if (doc.select("a.prev-next-list-link").first() != null) {
String nextPageUrl = doc.select("a.prev-next-list-link").first().attr("href");
System.out.println(nextPageUrl);
if (nextPageUrl.startsWith("http")) {
nextPageUrl = nextPageUrl.replaceAll("https?://\\w?\\w?\\.?xhamster\\.", "https://m.xhamster.");
nextPageUrl = nextPageUrl.replaceAll("https?://xhamster2\\.", "https://m.xhamster2.");
nextPageUrl = nextPageUrl.replaceAll("https?://\\w?\\w?\\.?xhamster([^<]*)\\.", "https://m.xhamster$1.");
return Http.url(nextPageUrl).get();
}
}
@@ -168,8 +165,7 @@ public class XhamsterRipper extends AbstractHTMLRipper {
try {
// This works around some redirect fuckery xhamster likes to do where visiting m.xhamster.com sends to
// the page chamster.com but displays the mobile site from m.xhamster.com
pageWithImageUrl = pageWithImageUrl.replaceAll("://xhamster\\.", "://m.xhamster.");
pageWithImageUrl = pageWithImageUrl.replaceAll("://xhamster2\\.", "://m.xhamster.");
pageWithImageUrl = pageWithImageUrl.replaceAll("://xhamster([^<]*)\\.", "://m.xhamster$1.");
String image = Http.url(new URL(pageWithImageUrl)).get().select("a > img#photoCurr").attr("src");
result.add(image);
downloadFile(image);
@@ -187,8 +183,7 @@ public class XhamsterRipper extends AbstractHTMLRipper {
String image = page.attr("href");
// This works around some redirect fuckery xhamster likes to do where visiting m.xhamster.com sends to
// the page chamster.com but displays the mobile site from m.xhamster.com
image = image.replaceAll("://xhamster\\.", "://m.xhamster.");
image = image.replaceAll("://xhamster2\\.", "://m.xhamster.");
image = image.replaceAll("://xhamster([^<]*)\\.", "://m.xhamster$1.");
result.add(image);
downloadFile(image);
}

View File

@@ -14,6 +14,7 @@ import com.rarchives.ripme.ripper.rippers.ImgurRipper;
import com.rarchives.ripme.ripper.rippers.RedgifsRipper;
import com.rarchives.ripme.ripper.rippers.VidbleRipper;
import com.rarchives.ripme.ripper.rippers.GfycatRipper;
import com.rarchives.ripme.ripper.rippers.SoundgasmRipper;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
@@ -127,6 +128,20 @@ public class RipUtils {
}
return result;
}
else if (url.toExternalForm().contains("soundgasm.net")) {
try {
logger.info("Getting soundgasm page " + url);
SoundgasmRipper r = new SoundgasmRipper(url);
Document tempDoc = r.getFirstPage();
for (String u : r.getURLsFromPage(tempDoc)) {
result.add(new URL(u));
}
} catch (IOException e) {
// Do nothing
logger.warn("Exception while retrieving soundgasm page:", e);
}
return result;
}
Pattern p = Pattern.compile("https?://i.reddituploads.com/([a-zA-Z0-9]+)\\?.*");
Matcher m = p.matcher(url.toExternalForm());

View File

@@ -486,8 +486,15 @@ public class Utils {
return text;
}
/**
* Removes any potentially unsafe characters from a string and truncates it on a maximum length of 100 characters.
* Characters considered safe are alpha numerical characters as well as minus, dot, comma, underscore and whitespace.
*
* @param text The potentially unsafe text
* @return a filesystem safe string
*/
public static String filesystemSafe(String text) {
text = text.replaceAll("[^a-zA-Z0-9.-]", "_").replaceAll("__", "_").replaceAll("_+$", "");
text = text.replaceAll("[^a-zA-Z0-9-.,_ ]", "");
if (text.length() > 100) {
text = text.substring(0, 99);
}

View File

@@ -0,0 +1,51 @@
package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.CyberdropRipper;
import com.rarchives.ripme.utils.Http;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class CyberdropRipperTest extends RippersTest {
@Test
public void testScrolllerGID() throws IOException {
Map<URL, String> testURLs = new HashMap<>();
testURLs.put(new URL("https://cyberdrop.me/a/n4umdBjw"), "n4umdBjw");
testURLs.put(new URL("https://cyberdrop.me/a/iLtp4BjW"), "iLtp4BjW");
for (URL url : testURLs.keySet()) {
CyberdropRipper ripper = new CyberdropRipper(url);
ripper.setup();
Assertions.assertEquals(testURLs.get(url), ripper.getGID(ripper.getURL()));
deleteDir(ripper.getWorkingDir());
}
}
@Test
public void testCyberdropNumberOfFiles() throws IOException {
List<URL> testURLs = new ArrayList<URL>();
testURLs.add(new URL("https://cyberdrop.me/a/n4umdBjw"));
testURLs.add(new URL("https://cyberdrop.me/a/iLtp4BjW"));
for (URL url : testURLs) {
Assertions.assertTrue(willDownloadAllFiles(url));
}
}
public boolean willDownloadAllFiles(URL url) throws IOException {
Document doc = Http.url(url).get();
long numberOfLinks = doc.getElementsByClass("image").stream().count();
int numberOfFiles = Integer.parseInt(doc.getElementById("totalFilesAmount").text());
return numberOfLinks == numberOfFiles;
}
}

View File

@@ -44,4 +44,13 @@ public class GfycatRipperTest extends RippersTest {
GfycatRipper ripper = new GfycatRipper(new URL("https://gfycat.com/amp/TemptingExcellentIchthyosaurs"));
testRipper(ripper);
}
/**
* Rips a Gfycat profile with special characters in username
* @throws IOException
*/
public void testGfycatSpecialChar() throws IOException {
GfycatRipper ripper = new GfycatRipper(new URL("https://gfycat.com/@rsss.kr"));
testRipper(ripper);
}
}

View File

@@ -4,10 +4,12 @@ import java.io.IOException;
import java.net.URL;
import com.rarchives.ripme.ripper.rippers.Hentai2readRipper;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
public class Hentai2readRipperTest extends RippersTest {
@Test
@Tag("flaky")
public void testHentai2readAlbum() throws IOException {
Hentai2readRipper ripper = new Hentai2readRipper(new URL("https://hentai2read.com/sm_school_memorial/1/"));
testRipper(ripper);

View File

@@ -2,14 +2,43 @@ package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import com.rarchives.ripme.ripper.rippers.HentaiNexusRipper;
import org.json.JSONObject;
import org.junit.Assert;
import org.junit.jupiter.api.Test;
public class HentainexusRipperTest extends RippersTest {
@Test
public void testHentaiNexusAlbum() throws IOException {
HentaiNexusRipper ripper = new HentaiNexusRipper(new URL("https://hentainexus.com/view/44"));
testRipper(ripper);
public void testHentaiNexusJson() throws IOException {
List<URL> testURLs = new ArrayList<>();
testURLs.add(new URL("https://hentainexus.com/view/9202"));
testURLs.add(new URL("https://hentainexus.com/read/9202"));
testURLs.add(new URL("https://hentainexus.com/view/9202#001"));
testURLs.add(new URL("https://hentainexus.com/read/9202#001"));
for (URL url : testURLs) {
HentaiNexusRipper ripper = new HentaiNexusRipper(url);
boolean testOK = false;
try {
String jsonEncodedString = ripper.getJsonEncodedStringFromPage();
String jsonDecodedString = ripper.decodeJsonString(jsonEncodedString);
JSONObject json = new JSONObject(jsonDecodedString);
// Fail test if JSON empty
testOK = !json.isEmpty();
} catch (Exception e) {
// Fail test if JSON invalid, not present or other errors
testOK = false;
}
Assert.assertEquals(true, testOK);
}
}
}

View File

@@ -4,10 +4,12 @@ import java.io.IOException;
import java.net.URL;
import com.rarchives.ripme.ripper.rippers.ImagebamRipper;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
public class ImagebamRipperTest extends RippersTest {
@Test
@Tag("flaky")
public void testImagebamRip() throws IOException {
ImagebamRipper ripper = new ImagebamRipper(new URL("http://www.imagebam.com/gallery/488cc796sllyf7o5srds8kpaz1t4m78i"));
testRipper(ripper);

View File

@@ -30,6 +30,7 @@ public class ImagefapRipperTest extends RippersTest {
}
}
@Test
@Tag("flaky")
public void testImagefapGetAlbumTitle() throws IOException {
URL url = new URL("https://www.imagefap.com/gallery.php?gid=7789753");
ImagefapRipper ripper = new ImagefapRipper(url);

View File

@@ -5,10 +5,12 @@ import java.net.URL;
import com.rarchives.ripme.ripper.rippers.MotherlessRipper;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
public class MotherlessRipperTest extends RippersTest {
@Test
@Tag("flaky")
public void testMotherlessAlbumRip() throws IOException {
MotherlessRipper ripper = new MotherlessRipper(new URL("https://motherless.com/G1168D90"));
testRipper(ripper);

View File

@@ -29,7 +29,7 @@ public class PornhubRipperTest extends RippersTest {
@Test
public void testGetNextPage() throws IOException {
String baseURL = "https://www.pornhub.com/album/43902391";
String baseURL = "https://www.pornhub.com/album/30687901";
PornhubRipper ripper = new PornhubRipper(new URL(baseURL));
Document page = Http.url(baseURL).get();
int numPagesRemaining = 1;

View File

@@ -61,6 +61,7 @@ public class RedditRipperTest extends RippersTest {
}
@Test
@Tag("flaky")
public void testRedditGallery() throws IOException{
RedditRipper ripper = new RedditRipper(
new URL("https://www.reddit.com/gallery/hrrh23"));

View File

@@ -46,6 +46,7 @@ public class RedgifsRipperTest extends RippersTest {
* @throws IOException
*/
@Test
@Disabled("test or ripper broken")
public void testRedgifsSearch() throws IOException {
RedgifsRipper ripper = new RedgifsRipper(new URL("https://redgifs.com/gifs/browse/little-caprice"));
Document doc = ripper.getFirstPage();

View File

@@ -0,0 +1,53 @@
package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.ScrolllerRipper;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
public class ScrolllerRipperTest extends RippersTest {
@Test
public void testScrolllerGID() throws IOException {
Map<URL, String> testURLs = new HashMap<>();
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp"), "CatsStandingUp");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=pictures"), "CatsStandingUp");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?sort=top&filter=pictures"), "CatsStandingUp");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=pictures&sort=top"), "CatsStandingUp");
for (URL url : testURLs.keySet()) {
ScrolllerRipper ripper = new ScrolllerRipper(url);
ripper.setup();
Assertions.assertEquals(testURLs.get(url), ripper.getGID(ripper.getURL()));
deleteDir(ripper.getWorkingDir());
}
}
@Test
public void testScrolllerFilterRegex() throws IOException {
Map<URL, String> testURLs = new HashMap<>();
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp"), "NOFILTER");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=pictures"), "PICTURE");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=videos"), "VIDEO");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=albums"), "ALBUM");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?sort=top&filter=pictures"), "PICTURE");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?sort=top&filter=videos"), "VIDEO");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?sort=top&filter=albums"), "ALBUM");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=pictures&sort=top"), "PICTURE");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=videos&sort=top"), "VIDEO");
testURLs.put(new URL("https://scrolller.com/r/CatsStandingUp?filter=albums&sort=top"), "ALBUM");
for (URL url : testURLs.keySet()) {
ScrolllerRipper ripper = new ScrolllerRipper(url);
ripper.setup();
Assertions.assertEquals(testURLs.get(url), ripper.convertFilterString(ripper.getParameter(ripper.getURL(),"filter")));
deleteDir(ripper.getWorkingDir());
}
}
}

View File

@@ -0,0 +1,23 @@
package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.RedditRipper;
import org.junit.Test;
import com.rarchives.ripme.ripper.rippers.SoundgasmRipper;
import java.io.IOException;
import java.net.URL;
public class SoundgasmRipperTest extends RippersTest {
@Test
public void testSoundgasmURLs() throws IOException {
SoundgasmRipper ripper = new SoundgasmRipper(new URL("https://soundgasm.net/u/_Firefly_xoxo/Rambles-with-my-Lovense"));
testRipper(ripper);
}
@Test
public void testRedditSoundgasmURL() throws IOException {
RedditRipper ripper = new RedditRipper(new URL("https://www.reddit.com/r/gonewildaudio/comments/kn1bvj/f4m_mistress_controlled_my_lovense_while_i_tried/"));
testRipper(ripper);
}
}

View File

@@ -51,6 +51,7 @@ public class WordpressComicRipperTest extends RippersTest {
testRipper(ripper);
}
@Test
@Tag("flaky")
public void test_konradokonski_1() throws IOException {
WordpressComicRipper ripper = new WordpressComicRipper(
new URL("http://www.konradokonski.com/sawdust/comic/get-up/"));
@@ -58,6 +59,7 @@ public class WordpressComicRipperTest extends RippersTest {
}
@Test
@Tag("flaky")
public void test_konradokonski_2() throws IOException {
WordpressComicRipper ripper = new WordpressComicRipper(
new URL("http://www.konradokonski.com/wiory/comic/08182008/"));

View File

@@ -35,7 +35,7 @@ public class XhamsterRipperTest extends RippersTest {
@Test
@Tag("flaky")
public void testXhamsterAlbumDesiDomain() throws IOException {
XhamsterRipper ripper = new XhamsterRipper(new URL("https://xhamster.desi/photos/gallery/japanese-dolls-4-asahi-mizuno-7254664"));
XhamsterRipper ripper = new XhamsterRipper(new URL("https://xhamster5.desi/photos/gallery/japanese-dolls-4-asahi-mizuno-7254664"));
testRipper(ripper);
}
@Test
@@ -49,9 +49,9 @@ public class XhamsterRipperTest extends RippersTest {
XhamsterRipper ripper = new XhamsterRipper(new URL("https://pt.xhamster.com/photos/gallery/silvana-7105696"));
testRipper(ripper);
}
@Test
public void testGetGID() throws IOException {
URL url = new URL("https://xhamster.com/photos/gallery/japanese-dolls-4-asahi-mizuno-7254664");
URL url = new URL("https://xhamster5.desi/photos/gallery/japanese-dolls-4-asahi-mizuno-7254664");
XhamsterRipper ripper = new XhamsterRipper(url);
Assertions.assertEquals("7254664", ripper.getGID(url));
}

View File

@@ -1,6 +1,7 @@
package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.YoupornRipper;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import java.io.IOException;
@@ -10,6 +11,7 @@ import java.util.List;
public class YoupornRipperTest extends RippersTest {
@Test
@Tag("flaky")
public void testYoupornRipper() throws IOException {
List<URL> contentURLs = new ArrayList<>();
contentURLs.add(new URL("http://www.youporn.com/watch/7669155/mrs-li-amateur-69-orgasm/?from=categ"));