mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-01-16 20:28:15 +01:00
Merge branch 'main' into luscious-Fixes
This commit is contained in:
commit
87b386e2f7
30
.github/ISSUE_TEMPLATE.md
vendored
30
.github/ISSUE_TEMPLATE.md
vendored
@ -1,15 +1,15 @@
|
||||
* Ripme version:
|
||||
* Java version: <!-- (output of `java -version`) -->
|
||||
* Operating system: <!-- (if Windows, output of `ver` or `winver`) -->
|
||||
<!-- Please do not link to content featuring underage characters even if the characters are drawn.
|
||||
These works are still illegal in many places including much of America -->
|
||||
* Exact URL you were trying to rip when the problem occurred:
|
||||
* Please include any additional information about how to reproduce the problem:
|
||||
|
||||
## Expected Behavior
|
||||
|
||||
Detail the expected behavior here.
|
||||
|
||||
## Actual Behavior
|
||||
|
||||
Detail the actual (incorrect) behavior here. You can post log snippets or attach log files to your issue report.
|
||||
* Ripme version:
|
||||
* Java version: <!-- (output of `java -version`) -->
|
||||
* Operating system: <!-- (if Windows, output of `ver` or `winver`) -->
|
||||
<!-- Please do not link to content featuring underage characters even if the characters are drawn.
|
||||
These works are still illegal in many places including much of America -->
|
||||
* Exact URL you were trying to rip when the problem occurred:
|
||||
* Please include any additional information about how to reproduce the problem:
|
||||
|
||||
## Expected Behavior
|
||||
|
||||
Detail the expected behavior here.
|
||||
|
||||
## Actual Behavior
|
||||
|
||||
Detail the actual (incorrect) behavior here. You can post log snippets or attach log files to your issue report.
|
||||
|
54
.github/PULL_REQUEST_TEMPLATE.md
vendored
54
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -1,27 +1,27 @@
|
||||
# Category
|
||||
|
||||
This change is exactly one of the following (please change `[ ]` to `[x]`) to indicate which:
|
||||
* [ ] a bug fix (Fix #...)
|
||||
* [ ] a new Ripper
|
||||
* [ ] a refactoring
|
||||
* [ ] a style change/fix
|
||||
* [ ] a new feature
|
||||
|
||||
|
||||
# Description
|
||||
|
||||
Please add details about your change here.
|
||||
|
||||
|
||||
# Testing
|
||||
|
||||
Required verification:
|
||||
* [ ] I've verified that there are no regressions in `mvn test` (there are no new failures or errors).
|
||||
* [ ] I've verified that this change works as intended.
|
||||
* [ ] Downloads all relevant content.
|
||||
* [ ] Downloads content from multiple pages (as necessary or appropriate).
|
||||
* [ ] Saves content at reasonable file names (e.g. page titles or content IDs) to help easily browse downloaded content.
|
||||
* [ ] I've verified that this change did not break existing functionality (especially in the Ripper I modified).
|
||||
|
||||
Optional but recommended:
|
||||
* [ ] I've added a unit test to cover my change.
|
||||
# Category
|
||||
|
||||
This change is exactly one of the following (please change `[ ]` to `[x]`) to indicate which:
|
||||
* [ ] a bug fix (Fix #...)
|
||||
* [ ] a new Ripper
|
||||
* [ ] a refactoring
|
||||
* [ ] a style change/fix
|
||||
* [ ] a new feature
|
||||
|
||||
|
||||
# Description
|
||||
|
||||
Please add details about your change here.
|
||||
|
||||
|
||||
# Testing
|
||||
|
||||
Required verification:
|
||||
* [ ] I've verified that there are no regressions in `mvn test` (there are no new failures or errors).
|
||||
* [ ] I've verified that this change works as intended.
|
||||
* [ ] Downloads all relevant content.
|
||||
* [ ] Downloads content from multiple pages (as necessary or appropriate).
|
||||
* [ ] Saves content at reasonable file names (e.g. page titles or content IDs) to help easily browse downloaded content.
|
||||
* [ ] I've verified that this change did not break existing functionality (especially in the Ripper I modified).
|
||||
|
||||
Optional but recommended:
|
||||
* [ ] I've added a unit test to cover my change.
|
||||
|
65
.github/workflows/gradle.yml
vendored
Normal file
65
.github/workflows/gradle.yml
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
name: CI + release
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- '**'
|
||||
tags:
|
||||
- '!**'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macOS-latest]
|
||||
java: [23]
|
||||
include: # test old java on one os only, upload from ubuntu java-17
|
||||
- os: ubuntu-latest
|
||||
java: 21
|
||||
upload: true
|
||||
|
||||
steps:
|
||||
|
||||
- uses: actions/checkout@v1
|
||||
|
||||
- name: Set environment CI_ variables
|
||||
id: ci-env
|
||||
uses: FranzDiebold/github-env-vars-action@v2
|
||||
|
||||
- name: Set up java
|
||||
uses: actions/setup-java@v4.2.1
|
||||
with:
|
||||
java-version: ${{ matrix.java }}
|
||||
distribution: zulu
|
||||
cache: gradle
|
||||
|
||||
- name: Build with Gradle
|
||||
run: gradle clean build -PjavacRelease=${{ matrix.java }}
|
||||
|
||||
- name: SHA256
|
||||
if: matrix.upload
|
||||
run: shasum -a 256 build/libs/*.jar
|
||||
|
||||
- name: upload jar as asset
|
||||
if: matrix.upload
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: zipped-ripme-jar
|
||||
path: build/libs/*.jar
|
||||
|
||||
- name: create pre-release
|
||||
id: create-pre-release
|
||||
if: matrix.upload
|
||||
uses: "marvinpinto/action-automatic-releases@latest"
|
||||
with:
|
||||
repo_token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
automatic_release_tag: "latest-${{ env.CI_REF_NAME_SLUG }}"
|
||||
prerelease: true
|
||||
title: "development build ${{ env.CI_REF_NAME }}"
|
||||
files: |
|
||||
build/libs/*.jar
|
||||
|
||||
# vim:set ts=2 sw=2 et:
|
21
.github/workflows/maven.yml
vendored
21
.github/workflows/maven.yml
vendored
@ -1,21 +0,0 @@
|
||||
name: Java CI
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macOS-latest]
|
||||
java: [1.8, 1.9]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Set up JDK 1.8
|
||||
uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: ${{ matrix.java }}
|
||||
- name: Build with Maven
|
||||
run: mvn package --file pom.xml
|
12
.gitignore
vendored
12
.gitignore
vendored
@ -80,6 +80,12 @@ buildNumber.properties
|
||||
# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
|
||||
!/.mvn/wrapper/maven-wrapper.jar
|
||||
|
||||
### gradle ###
|
||||
/.gradle
|
||||
/build
|
||||
# Avoid ignoring gradle wrapper jar file (.jar files are usually ignored)
|
||||
!/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
### Windows ###
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
@ -105,6 +111,11 @@ $RECYCLE.BIN/
|
||||
.vscode
|
||||
.idea
|
||||
.project
|
||||
local.properties
|
||||
|
||||
### Build files
|
||||
.gradle/
|
||||
build/
|
||||
|
||||
### Ripme ###
|
||||
ripme.log
|
||||
@ -112,7 +123,6 @@ rips/
|
||||
.history
|
||||
ripme.jar.update
|
||||
*.swp
|
||||
*.properties
|
||||
!LabelsBundle*.properties
|
||||
history.json
|
||||
*.iml
|
||||
|
23
.project
23
.project
@ -1,23 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>ripme</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.m2e.core.maven2Builder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
<nature>org.eclipse.m2e.core.maven2Nature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
12
.travis.yml
12
.travis.yml
@ -1,12 +0,0 @@
|
||||
language: java
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- jdk: openjdk9
|
||||
before_install:
|
||||
- rm "${JAVA_HOME}/lib/security/cacerts"
|
||||
- ln -s /etc/ssl/certs/java/cacerts "${JAVA_HOME}/lib/security/cacerts"
|
||||
- jdk: openjdk8
|
||||
|
||||
after_success:
|
||||
- mvn clean test jacoco:report coveralls:report
|
10
.vscode/settings.json
vendored
10
.vscode/settings.json
vendored
@ -1,10 +0,0 @@
|
||||
{
|
||||
"files.exclude": {
|
||||
"target/**": true,
|
||||
"**/.git": true,
|
||||
"**/.DS_Store": true,
|
||||
"**/*.class": true,
|
||||
"**/rips/**": true
|
||||
},
|
||||
"java.configuration.updateBuildConfiguration": "automatic"
|
||||
}
|
159
README.md
159
README.md
@ -1,91 +1,146 @@
|
||||
# RipMe [![Licensed under the MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/RipMeApp/ripme/blob/master/LICENSE.txt) [![Join the chat at https://gitter.im/RipMeApp/Lobby](https://badges.gitter.im/RipMeApp/Lobby.svg)](https://gitter.im/RipMeApp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Subreddit](https://img.shields.io/badge/discuss-on%20reddit-blue.svg)](https://www.reddit.com/r/ripme/)
|
||||
# RipMe
|
||||
|
||||
[![Build Status](https://travis-ci.org/RipMeApp/ripme.svg?branch=master)](https://travis-ci.org/RipMeApp/ripme)
|
||||
[![Coverage Status](https://coveralls.io/repos/github/RipMeApp/ripme/badge.svg?branch=master)](https://coveralls.io/github/RipMeApp/ripme?branch=master)
|
||||
[![Licensed under the MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](/LICENSE.txt)
|
||||
[![Join the chat at https://gitter.im/RipMeApp/Lobby](https://badges.gitter.im/RipMeApp/Lobby.svg)](https://gitter.im/RipMeApp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[![Subreddit](https://img.shields.io/badge/discuss-on%20reddit-blue.svg)](https://www.reddit.com/r/ripme/)
|
||||
![alt Badge Status](https://github.com/ripmeapp2/ripme/actions/workflows/gradle.yml/badge.svg)
|
||||
[![Coverage Status](https://coveralls.io/repos/github/RipMeApp/ripme/badge.svg?branch=main)](https://coveralls.io/github/RipMeApp/ripme?branch=main)
|
||||
|
||||
# Contribute
|
||||
## Recent development updates
|
||||
|
||||
RipMe is maintained with ♥️ and in our limited free time by **[@MetaPrime](https://github.com/metaprime)**, **[@cyian-1756](https://github.com/cyian-1756)** and **[@kevin51jiang](https://github.com/kevin51jiang)**. If you'd like to contribute but aren't good with code, help keep us happy with a small contribution!
|
||||
- For a while, the ripmeapp/ripme repo was inactive, but development continued at ripmeapp2/ripme.
|
||||
- Now, maintainers have been updated and development has been rejoined with ripmeapp/ripme where it will continue.
|
||||
- You may find a number of stale issues on ripmeapp/ripme and/or on ripmeapp2/ripme until everything is merged back together and statuses are updated.
|
||||
- The current active development repo for RipMe is located at [ripmeapp/ripme](https://github.com/ripmeapp/ripme/).
|
||||
|
||||
[![Tip with PayPal](https://img.shields.io/badge/PayPal-Buy_us...-lightgrey.svg)](https://www.paypal.me/ripmeapp)
|
||||
[![Tip with PayPal](https://img.shields.io/badge/coffee-%245-green.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=5.00¤cyCode=USD&locale.x=en_US&country.x=US)
|
||||
[![Tip with PayPal](https://img.shields.io/badge/beer-%2410-yellow.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=10.00¤cyCode=USD&locale.x=en_US&country.x=US)
|
||||
[![Tip with PayPal](https://img.shields.io/badge/lunch-%2420-orange.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=20.00¤cyCode=USD&locale.x=en_US&country.x=US)
|
||||
[![Tip with PayPal](https://img.shields.io/badge/dinner-%2450-red.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=50.00¤cyCode=USD&locale.x=en_US&country.x=US)
|
||||
[![Tip with PayPal](https://img.shields.io/badge/custom_amount-...-lightgrey.svg)](https://www.paypal.me/ripmeapp)
|
||||
## Maintainers
|
||||
|
||||
RipMe has been maintained with ♥️ and in our limited free time by the following
|
||||
people, roughly in order from most recent primary developer, with current
|
||||
activity marked by color of the indicator:
|
||||
|
||||
- **[@soloturn](https://github.com/soloturn)** 🟢,
|
||||
- **[@cyian-1756](https://github.com/cyian-1756)** 🟥,
|
||||
- **[@kevin51jiang](https://github.com/kevin51jiang)** 🟥,
|
||||
- **[@MetaPrime](https://github.com/metaprime)** 🟡,
|
||||
- and its original creator, **[@4pr0n](https://github.com/4pr0n)** 🟥.
|
||||
|
||||
If you'd like to become a maintainer, ask an active maintainer to be added to the team.
|
||||
|
||||
## Contact
|
||||
|
||||
Chat with the team and community on [gitter](https://gitter.im/RipMeApp/Lobby) and [reddit.com/r/ripme](https://www.reddit.com/r/ripme/)
|
||||
|
||||
# About
|
||||
|
||||
RipMe is an album ripper for various websites. Runs on your computer. Requires Java 8.
|
||||
RipMe is a cross-platform tool. It has been tested and confirmed working on Windows, Linux and MacOS.
|
||||
RipMe is an album ripper for various websites. It is a cross-platform tool that runs on your computer, and
|
||||
requires Java 21 or later to run. RipMe has been tested and is confirmed working on Windows, Linux, and MacOS.
|
||||
|
||||
![Screenshot](https://i.imgur.com/UCQNjeg.png)
|
||||
|
||||
## [Downloads](https://github.com/ripmeapp/ripme/releases)
|
||||
## Downloads
|
||||
|
||||
Download `ripme.jar` from the [latest release](https://github.com/ripmeapp/ripme/releases).
|
||||
Download `ripme.jar` from the [latest release](https://github.com/ripmeapp2/ripme/releases). For information about running the `.jar` file, see
|
||||
[the How To Run wiki](https://github.com/ripmeapp/ripme/wiki/How-To-Run-RipMe).
|
||||
|
||||
**Note: If you're currently using version 1.2.x, 1.3.x or 1.7.49, you will not automatically get updates to the newest versions. We recommend downloading the latest version from the link above.**
|
||||
The version number like `ripme-1.7.94-17-2167aa34-feature_auto_release.jar` contains a release number (`1.7.94`), given by
|
||||
a person, the number of commits since this version (`17`). The commit SHA (`2167aa34`) uniquely references the
|
||||
source code ripme was built from. If it is not built from the main branch, the branch name (`feature/auto-release`) is
|
||||
given.
|
||||
|
||||
For information about running the `.jar` file, see [the How To Run wiki](https://github.com/ripmeapp/ripme/wiki/How-To-Run-RipMe).
|
||||
## Installation
|
||||
|
||||
## [Changelog](https://github.com/ripmeapp/ripme/blob/master/ripme.json) (ripme.json)
|
||||
On macOS, there is a [cask](https://github.com/Homebrew/homebrew-cask/blob/master/Casks/ripme.rb).
|
||||
|
||||
```
|
||||
brew install --cask ripme && xattr -d com.apple.quarantine /Applications/ripme.jar
|
||||
```
|
||||
|
||||
## Changelog
|
||||
|
||||
[Changelog](/ripme.json) **(ripme.json)**
|
||||
|
||||
# Features
|
||||
|
||||
* Quickly downloads all images in an online album (see supported sites below)
|
||||
* Easily re-rip albums to fetch new content
|
||||
* Built in updater
|
||||
* Skips already downloaded images by default
|
||||
* Can auto skip e-hentai and nhentai albums containing certain tags [See here for how to enable](https://github.com/RipMeApp/ripme/wiki/Config-options#nhentaiblacklisttags)
|
||||
* Download a range of urls [See here for how](https://github.com/RipMeApp/ripme/wiki/How-To-Run-RipMe#downloading-a-url-range)
|
||||
- Quickly downloads all images in an online album. [See supported sites](https://github.com/ripmeapp/ripme/wiki/Supported-Sites)
|
||||
- Easily re-rip albums to fetch new content
|
||||
- Built in updater
|
||||
- Skips already downloaded images by default
|
||||
- Can auto skip e-hentai and nhentai albums containing certain tags. [See here for how to enable](https://github.com/RipMeApp/ripme/wiki/Config-options#nhentaiblacklisttags)
|
||||
- Download a range of urls. [See here for how](https://github.com/RipMeApp/ripme/wiki/How-To-Run-RipMe#downloading-a-url-range)
|
||||
|
||||
## [List of Supported Sites](https://github.com/ripmeapp/ripme/wiki/Supported-Sites)
|
||||
## List of Supported Sites
|
||||
|
||||
* imgur
|
||||
* twitter
|
||||
* tumblr
|
||||
* instagram
|
||||
* flickr
|
||||
* photobucket
|
||||
* reddit
|
||||
* gonewild
|
||||
* motherless
|
||||
* imagefap
|
||||
* imagearn
|
||||
* seenive
|
||||
* vinebox
|
||||
* 8muses
|
||||
* deviantart
|
||||
* xhamster
|
||||
* (more)
|
||||
- imgur
|
||||
- twitter
|
||||
- tumblr
|
||||
- instagram
|
||||
- flickr
|
||||
- photobucket
|
||||
- reddit
|
||||
- gonewild
|
||||
- motherless
|
||||
- imagefap
|
||||
- imagearn
|
||||
- seenive
|
||||
- vinebox
|
||||
- 8muses
|
||||
- deviantart
|
||||
- xhamster
|
||||
- [(more)](https://github.com/ripmeapp/ripme/wiki/Supported-Sites)
|
||||
|
||||
## Not Supported?
|
||||
|
||||
Request support for more sites by adding a comment to [this Github issue](https://github.com/RipMeApp/ripme/issues/38).
|
||||
|
||||
If you're a developer, you can add your own Ripper by following the wiki guide
|
||||
If you're a developer, you can add your own Ripper by following the wiki guide:
|
||||
[How To Create A Ripper for HTML Websites](https://github.com/ripmeapp/ripme/wiki/How-To-Create-A-Ripper-for-HTML-websites).
|
||||
|
||||
# Compiling & Building
|
||||
|
||||
The project uses [Maven](http://maven.apache.org/).
|
||||
To build the .jar file using Maven, navigate to the root project directory and run:
|
||||
The project uses [Gradle](https://gradle.org). To build the .jar file,
|
||||
navigate to the root project directory and run at least the test you
|
||||
change, e.g. Xhamster. test execution can also excluded completely:
|
||||
|
||||
```bash
|
||||
mvn clean compile assembly:single
|
||||
./gradlew clean build testAll --tests XhamsterRipperTest.testXhamster2Album
|
||||
./gradlew clean build -x test --warning-mode all
|
||||
```
|
||||
|
||||
This will include all dependencies in the JAR.
|
||||
The generated JAR (java archive) in build/libs will include all
|
||||
dependencies.
|
||||
|
||||
# Running Tests
|
||||
|
||||
After building you can run tests by running the following:
|
||||
Tests can be tagged as beeing slow, or flaky. The gradle build reacts to
|
||||
the following combinations of tags:
|
||||
|
||||
- default is to run all tests without tag.
|
||||
- testAll runs all tests.
|
||||
- testFlaky runs tests with tag "flaky".
|
||||
- testSlow runs tests with tag "slow".
|
||||
- tests can be run by test class, or single test. Use "testAll" so it does
|
||||
not matter if a test is tagged or not.
|
||||
|
||||
```bash
|
||||
mvn test
|
||||
./gradlew test
|
||||
./gradlew testAll
|
||||
./gradlew testFlaky
|
||||
./gradlew testSlow
|
||||
./gradlew testAll --tests XhamsterRipperTest
|
||||
./gradlew testAll --tests XhamsterRipperTest.testXhamster2Album
|
||||
```
|
||||
|
||||
Please note that some tests may fail as sites change and our rippers become out of date.
|
||||
Start by building and testing a released version of RipMe
|
||||
and then ensure that any changes you make do not cause more tests to break.
|
||||
Please note that some tests may fail as sites change and our rippers
|
||||
become out of date. Start by building and testing a released version
|
||||
of RipMe and then ensure that any changes you make do not cause more
|
||||
tests to break.
|
||||
|
||||
# New GUI - compose-jb
|
||||
As Java Swing will go away in future, a new GUI technology should be used. One of the
|
||||
candidates is [Jetpack Compose for Desktop](https://github.com/JetBrains/compose-jb/).
|
||||
|
||||
The library leverages the compose library for android and provides it for android,
|
||||
desktop and web. The navigation library is not available for desktop, so Arkadii Ivanov
|
||||
implemented
|
||||
[decompose](https://proandroiddev.com/a-comprehensive-hundred-line-navigation-for-jetpack-desktop-compose-5b723c4f256e).
|
||||
|
@ -1,2 +1 @@
|
||||
mvn clean compile assembly:single
|
||||
mvn io.github.zlika:reproducible-build-maven-plugin:0.6:strip-jar
|
||||
./gradlew clean build -x test
|
||||
|
149
build.gradle.kts
Normal file
149
build.gradle.kts
Normal file
@ -0,0 +1,149 @@
|
||||
// the build derives a version with the jgitver plugin out of a tag in the git history. when there is no
|
||||
// git repo, the jgitver default would be 0.0.0. one can override this version with a parameter. also, permit
|
||||
// to start the build setting the javac release parameter, no parameter means build for java-17:
|
||||
// gradle clean build -PjavacRelease=21
|
||||
// gradle clean build -PcustomVersion=1.0.0-10-asdf
|
||||
val customVersion = (project.findProperty("customVersion") ?: "") as String
|
||||
val javacRelease = (project.findProperty("javacRelease") ?: "21") as String
|
||||
|
||||
plugins {
|
||||
id("fr.brouillard.oss.gradle.jgitver") version "0.9.1"
|
||||
id("jacoco")
|
||||
id("java")
|
||||
id("maven-publish")
|
||||
}
|
||||
|
||||
repositories {
|
||||
mavenLocal()
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation("com.lmax:disruptor:3.4.4")
|
||||
implementation("org.java-websocket:Java-WebSocket:1.5.3")
|
||||
implementation("org.jsoup:jsoup:1.16.1")
|
||||
implementation("org.json:json:20211205")
|
||||
implementation("com.j2html:j2html:1.6.0")
|
||||
implementation("commons-configuration:commons-configuration:1.10")
|
||||
implementation("commons-cli:commons-cli:1.5.0")
|
||||
implementation("commons-io:commons-io:2.13.0")
|
||||
implementation("org.apache.httpcomponents:httpclient:4.5.14")
|
||||
implementation("org.apache.httpcomponents:httpmime:4.5.14")
|
||||
implementation("org.apache.logging.log4j:log4j-api:2.20.0")
|
||||
implementation("org.apache.logging.log4j:log4j-core:2.20.0")
|
||||
implementation("com.squareup.okhttp3:okhttp:4.12.0")
|
||||
implementation("org.graalvm.js:js:22.3.2")
|
||||
testImplementation(enforcedPlatform("org.junit:junit-bom:5.10.0"))
|
||||
testImplementation("org.junit.jupiter:junit-jupiter")
|
||||
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
|
||||
}
|
||||
|
||||
group = "com.rarchives.ripme"
|
||||
version = "1.7.94"
|
||||
description = "ripme"
|
||||
|
||||
jacoco {
|
||||
toolVersion = "0.8.12"
|
||||
}
|
||||
|
||||
jgitver {
|
||||
gitCommitIDLength = 8
|
||||
nonQualifierBranches = "main,master"
|
||||
useGitCommitID = true
|
||||
}
|
||||
|
||||
afterEvaluate {
|
||||
if (customVersion != "") {
|
||||
project.version = customVersion
|
||||
}
|
||||
}
|
||||
|
||||
tasks.compileJava {
|
||||
options.release.set(Integer.parseInt(javacRelease))
|
||||
}
|
||||
|
||||
tasks.withType<Jar> {
|
||||
duplicatesStrategy = DuplicatesStrategy.INCLUDE
|
||||
manifest {
|
||||
attributes["Main-Class"] = "com.rarchives.ripme.App"
|
||||
attributes["Implementation-Version"] = archiveVersion
|
||||
attributes["Multi-Release"] = "true"
|
||||
}
|
||||
|
||||
// To add all of the dependencies otherwise a "NoClassDefFoundError" error
|
||||
from(sourceSets.main.get().output)
|
||||
|
||||
dependsOn(configurations.runtimeClasspath)
|
||||
from({
|
||||
configurations.runtimeClasspath.get().filter { it.name.endsWith("jar") }.map { zipTree(it) }
|
||||
})
|
||||
}
|
||||
|
||||
publishing {
|
||||
publications {
|
||||
create<MavenPublication>("maven") {
|
||||
from(components["java"])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tasks.withType<JavaCompile> {
|
||||
options.encoding = "UTF-8"
|
||||
val compilerArgs = options.compilerArgs
|
||||
compilerArgs.addAll(listOf("-Xlint:deprecation"))
|
||||
}
|
||||
|
||||
tasks.test {
|
||||
testLogging {
|
||||
showStackTraces = true
|
||||
}
|
||||
useJUnitPlatform {
|
||||
// gradle-6.5.1 not yet allows passing this as parameter, so exclude it
|
||||
excludeTags("flaky","slow")
|
||||
includeEngines("junit-jupiter")
|
||||
includeEngines("junit-vintage")
|
||||
}
|
||||
finalizedBy(tasks.jacocoTestReport) // report is always generated after tests run
|
||||
}
|
||||
|
||||
tasks.register<Test>("testAll") {
|
||||
useJUnitPlatform {
|
||||
includeTags("any()", "none()")
|
||||
}
|
||||
}
|
||||
|
||||
tasks.register<Test>("testFlaky") {
|
||||
useJUnitPlatform {
|
||||
includeTags("flaky")
|
||||
}
|
||||
}
|
||||
|
||||
tasks.register<Test>("testSlow") {
|
||||
useJUnitPlatform {
|
||||
includeTags("slow")
|
||||
}
|
||||
}
|
||||
|
||||
tasks.register<Test>("testTagged") {
|
||||
useJUnitPlatform {
|
||||
includeTags("any()")
|
||||
}
|
||||
}
|
||||
|
||||
// make all archive tasks in the build reproducible
|
||||
tasks.withType<AbstractArchiveTask>().configureEach {
|
||||
isPreserveFileTimestamps = false
|
||||
isReproducibleFileOrder = true
|
||||
}
|
||||
|
||||
println("Build directory: ${file(layout.buildDirectory)}")
|
||||
|
||||
tasks.jacocoTestReport {
|
||||
dependsOn(tasks.test) // tests are required to run before generating the report
|
||||
reports {
|
||||
xml.required.set(false)
|
||||
csv.required.set(false)
|
||||
html.outputLocation.set(file("${file(layout.buildDirectory)}/jacocoHtml"))
|
||||
}
|
||||
}
|
||||
|
4
build.sh
4
build.sh
@ -1,4 +1,2 @@
|
||||
#!/usr/bin/env bash
|
||||
mvn clean compile assembly:single
|
||||
# Strip the jar of any non-reproducible metadata such as timestamps
|
||||
mvn io.github.zlika:reproducible-build-maven-plugin:0.6:strip-jar
|
||||
./gradlew clean build -x test
|
||||
|
@ -1,2 +0,0 @@
|
||||
@echo off
|
||||
powershell -c ".\deploy.ps1 -source (Join-Path target (Get-Item -Path .\target\* -Filter *.jar)[0].Name) -dest ripme.jar"
|
16
deploy.ps1
16
deploy.ps1
@ -1,16 +0,0 @@
|
||||
Param (
|
||||
[Parameter(Mandatory=$True)]
|
||||
[string]$source,
|
||||
[Parameter(Mandatory=$True)]
|
||||
[string]$dest
|
||||
)
|
||||
|
||||
Copy-Item -Path $source -Destination $dest
|
||||
|
||||
$sourceHash = (Get-FileHash $source -algorithm MD5).Hash
|
||||
$destHash = (Get-FileHash $dest -algorithm MD5).Hash
|
||||
if ($sourceHash -eq $destHash) {
|
||||
Write-Output 'Deployed successfully.'
|
||||
} else {
|
||||
Write-Output 'Hash Mismatch: did you close ripme before deploying?'
|
||||
}
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
Binary file not shown.
7
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
7
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip
|
||||
networkTimeout=10000
|
||||
validateDistributionUrl=true
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
252
gradlew
vendored
Executable file
252
gradlew
vendored
Executable file
@ -0,0 +1,252 @@
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Copyright © 2015-2021 the original authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
##############################################################################
|
||||
#
|
||||
# Gradle start up script for POSIX generated by Gradle.
|
||||
#
|
||||
# Important for running:
|
||||
#
|
||||
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||
# noncompliant, but you have some other compliant shell such as ksh or
|
||||
# bash, then to run this script, type that shell name before the whole
|
||||
# command line, like:
|
||||
#
|
||||
# ksh Gradle
|
||||
#
|
||||
# Busybox and similar reduced shells will NOT work, because this script
|
||||
# requires all of these POSIX shell features:
|
||||
# * functions;
|
||||
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||
# * compound commands having a testable exit status, especially «case»;
|
||||
# * various built-in commands including «command», «set», and «ulimit».
|
||||
#
|
||||
# Important for patching:
|
||||
#
|
||||
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||
#
|
||||
# The "traditional" practice of packing multiple parameters into a
|
||||
# space-separated string is a well documented source of bugs and security
|
||||
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||
# options in "$@", and eventually passing that to Java.
|
||||
#
|
||||
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||
# see the in-line comments for details.
|
||||
#
|
||||
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||
# Darwin, MinGW, and NonStop.
|
||||
#
|
||||
# (3) This script is generated from the Groovy template
|
||||
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||
# within the Gradle project.
|
||||
#
|
||||
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||
#
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
|
||||
# Resolve links: $0 may be a link
|
||||
app_path=$0
|
||||
|
||||
# Need this for daisy-chained symlinks.
|
||||
while
|
||||
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||
[ -h "$app_path" ]
|
||||
do
|
||||
ls=$( ls -ld "$app_path" )
|
||||
link=${ls#*' -> '}
|
||||
case $link in #(
|
||||
/*) app_path=$link ;; #(
|
||||
*) app_path=$APP_HOME$link ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# This is normally unused
|
||||
# shellcheck disable=SC2034
|
||||
APP_BASE_NAME=${0##*/}
|
||||
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
||||
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
|
||||
' "$PWD" ) || exit
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD=maximum
|
||||
|
||||
warn () {
|
||||
echo "$*"
|
||||
} >&2
|
||||
|
||||
die () {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
} >&2
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "$( uname )" in #(
|
||||
CYGWIN* ) cygwin=true ;; #(
|
||||
Darwin* ) darwin=true ;; #(
|
||||
MSYS* | MINGW* ) msys=true ;; #(
|
||||
NONSTOP* ) nonstop=true ;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||
else
|
||||
JAVACMD=$JAVA_HOME/bin/java
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD=java
|
||||
if ! command -v java >/dev/null 2>&1
|
||||
then
|
||||
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||
case $MAX_FD in #(
|
||||
max*)
|
||||
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
MAX_FD=$( ulimit -H -n ) ||
|
||||
warn "Could not query maximum file descriptor limit"
|
||||
esac
|
||||
case $MAX_FD in #(
|
||||
'' | soft) :;; #(
|
||||
*)
|
||||
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
ulimit -n "$MAX_FD" ||
|
||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||
esac
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command, stacking in reverse order:
|
||||
# * args from the command line
|
||||
# * the main class name
|
||||
# * -classpath
|
||||
# * -D...appname settings
|
||||
# * --module-path (only if needed)
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||
|
||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||
if "$cygwin" || "$msys" ; then
|
||||
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||
|
||||
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
for arg do
|
||||
if
|
||||
case $arg in #(
|
||||
-*) false ;; # don't mess with options #(
|
||||
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||
[ -e "$t" ] ;; #(
|
||||
*) false ;;
|
||||
esac
|
||||
then
|
||||
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||
fi
|
||||
# Roll the args list around exactly as many times as the number of
|
||||
# args, so each arg winds up back in the position where it started, but
|
||||
# possibly modified.
|
||||
#
|
||||
# NB: a `for` loop captures its iteration list before it begins, so
|
||||
# changing the positional parameters here affects neither the number of
|
||||
# iterations, nor the values presented in `arg`.
|
||||
shift # remove old arg
|
||||
set -- "$@" "$arg" # push replacement arg
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
|
||||
# Collect all arguments for the java command:
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
|
||||
# and any embedded shellness will be escaped.
|
||||
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
|
||||
# treated as '${Hostname}' itself on the command line.
|
||||
|
||||
set -- \
|
||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||
-classpath "$CLASSPATH" \
|
||||
org.gradle.wrapper.GradleWrapperMain \
|
||||
"$@"
|
||||
|
||||
# Stop when "xargs" is not available.
|
||||
if ! command -v xargs >/dev/null 2>&1
|
||||
then
|
||||
die "xargs is not available"
|
||||
fi
|
||||
|
||||
# Use "xargs" to parse quoted args.
|
||||
#
|
||||
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||
#
|
||||
# In Bash we could simply go:
|
||||
#
|
||||
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||
# set -- "${ARGS[@]}" "$@"
|
||||
#
|
||||
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||
# character that might be a shell metacharacter, then use eval to reverse
|
||||
# that process (while maintaining the separation between arguments), and wrap
|
||||
# the whole thing up as a single "set" statement.
|
||||
#
|
||||
# This will of course break if any of these variables contains a newline or
|
||||
# an unmatched quote.
|
||||
#
|
||||
|
||||
eval "set -- $(
|
||||
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||
xargs -n1 |
|
||||
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||
tr '\n' ' '
|
||||
)" '"$@"'
|
||||
|
||||
exec "$JAVACMD" "$@"
|
94
gradlew.bat
vendored
Normal file
94
gradlew.bat
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
@rem
|
||||
@rem Copyright 2015 the original author or authors.
|
||||
@rem
|
||||
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@rem you may not use this file except in compliance with the License.
|
||||
@rem You may obtain a copy of the License at
|
||||
@rem
|
||||
@rem https://www.apache.org/licenses/LICENSE-2.0
|
||||
@rem
|
||||
@rem Unless required by applicable law or agreed to in writing, software
|
||||
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@rem See the License for the specific language governing permissions and
|
||||
@rem limitations under the License.
|
||||
@rem
|
||||
@rem SPDX-License-Identifier: Apache-2.0
|
||||
@rem
|
||||
|
||||
@if "%DEBUG%"=="" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@rem
|
||||
@rem ##########################################################################
|
||||
|
||||
@rem Set local scope for the variables with windows NT shell
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%"=="" set DIRNAME=.
|
||||
@rem This is normally unused
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
||||
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
||||
|
||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
||||
|
||||
@rem Find java.exe
|
||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if %ERRORLEVEL% equ 0 goto execute
|
||||
|
||||
echo. 1>&2
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
|
||||
echo. 1>&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||
echo location of your Java installation. 1>&2
|
||||
|
||||
goto fail
|
||||
|
||||
:findJavaFromJavaHome
|
||||
set JAVA_HOME=%JAVA_HOME:"=%
|
||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto execute
|
||||
|
||||
echo. 1>&2
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
|
||||
echo. 1>&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||
echo location of your Java installation. 1>&2
|
||||
|
||||
goto fail
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if %ERRORLEVEL% equ 0 goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
set EXIT_CODE=%ERRORLEVEL%
|
||||
if %EXIT_CODE% equ 0 set EXIT_CODE=1
|
||||
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
|
||||
exit /b %EXIT_CODE%
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
||||
:omega
|
86
patch.py
86
patch.py
@ -1,86 +0,0 @@
|
||||
import json
|
||||
import subprocess
|
||||
from hashlib import sha256
|
||||
|
||||
# This script will:
|
||||
# - read current version
|
||||
# - increment patch version
|
||||
# - update version in a few places
|
||||
# - insert new line in ripme.json with message
|
||||
# - build ripme
|
||||
# - add the hash of the latest binary to ripme.json
|
||||
# - commit all changes
|
||||
message = input('message: ')
|
||||
|
||||
# Strip any spaces that might've been entered before the message
|
||||
message.lstrip()
|
||||
|
||||
|
||||
def get_ripme_json():
|
||||
with open('ripme.json') as dataFile:
|
||||
ripmeJson = json.load(dataFile)
|
||||
return ripmeJson
|
||||
|
||||
|
||||
def update_hash(current_hash):
|
||||
ripmeJson = get_ripme_json()
|
||||
with open('ripme.json', 'w') as dataFile:
|
||||
ripmeJson["currentHash"] = current_hash
|
||||
print(ripmeJson["currentHash"])
|
||||
json.dump(ripmeJson, dataFile, indent=4)
|
||||
|
||||
|
||||
def update_change_list(message):
|
||||
ripmeJson = get_ripme_json()
|
||||
with open('ripme.json', 'w') as dataFile:
|
||||
ripmeJson["changeList"].insert(0, message)
|
||||
json.dump(ripmeJson, dataFile, indent=4)
|
||||
|
||||
|
||||
currentVersion = get_ripme_json()["latestVersion"]
|
||||
|
||||
print('Current version ' + currentVersion)
|
||||
|
||||
versionFields = currentVersion.split('.')
|
||||
patchCur = int(versionFields[2])
|
||||
patchNext = patchCur + 1
|
||||
majorMinor = versionFields[:2]
|
||||
majorMinor.append(str(patchNext))
|
||||
nextVersion = '.'.join(majorMinor)
|
||||
|
||||
print('Updating to ' + nextVersion)
|
||||
|
||||
substrExpr = 's/' + currentVersion + '/' + nextVersion + '/'
|
||||
subprocess.call(['sed', '-i', '-e', substrExpr, 'src/main/java/com/rarchives/ripme/ui/UpdateUtils.java'])
|
||||
subprocess.call(['git', 'grep', 'DEFAULT_VERSION.*' + nextVersion,
|
||||
'src/main/java/com/rarchives/ripme/ui/UpdateUtils.java'])
|
||||
|
||||
substrExpr = 's/\\\"latestVersion\\\": \\\"' + currentVersion + '\\\"/\\\"latestVersion\\\": \\\"' + \
|
||||
nextVersion + '\\\"/'
|
||||
subprocess.call(['sed', '-i', '-e', substrExpr, 'ripme.json'])
|
||||
subprocess.call(['git', 'grep', 'latestVersion', 'ripme.json'])
|
||||
|
||||
substrExpr = 's/<version>' + currentVersion + '/<version>' + nextVersion + '/'
|
||||
subprocess.call(['sed', '-i', '-e', substrExpr, 'pom.xml'])
|
||||
subprocess.call(['git', 'grep', '<version>' + nextVersion + '</version>', 'pom.xml'])
|
||||
|
||||
commitMessage = nextVersion + ': ' + message
|
||||
|
||||
update_change_list(commitMessage)
|
||||
|
||||
|
||||
print("Building ripme")
|
||||
subprocess.call(["mvn", "clean", "compile", "assembly:single"])
|
||||
print("Stripping jar")
|
||||
subprocess.call(["mvn", "io.github.zlika:reproducible-build-maven-plugin:0.6:strip-jar"])
|
||||
print("Hashing .jar file")
|
||||
openedFile = open("./target/ripme-{}-jar-with-dependencies.jar".format(nextVersion), "rb")
|
||||
readFile = openedFile.read()
|
||||
file_hash = sha256(readFile).hexdigest()
|
||||
print("Hash is: {}".format(file_hash))
|
||||
print("Updating hash")
|
||||
update_hash(file_hash)
|
||||
subprocess.call(['git', 'add', '-u'])
|
||||
subprocess.call(['git', 'commit', '-m', commitMessage])
|
||||
subprocess.call(['git', 'tag', nextVersion])
|
||||
print("Remember to run `git push origin master` before release.py")
|
169
pom.xml
169
pom.xml
@ -1,169 +0,0 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>com.rarchives.ripme</groupId>
|
||||
<artifactId>ripme</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>1.7.90</version>
|
||||
<name>ripme</name>
|
||||
<url>http://rip.rarchives.com</url>
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<junit.version>4.12</junit.version>
|
||||
<junit.jupiter.version>5.5.0</junit.jupiter.version>
|
||||
<junit.vintage.version>5.5.0</junit.vintage.version>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter-api</artifactId>
|
||||
<version>${junit.jupiter.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter-engine</artifactId>
|
||||
<version>${junit.jupiter.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.vintage</groupId>
|
||||
<artifactId>junit-vintage-engine</artifactId>
|
||||
<version>${junit.vintage.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<!-- jsoup HTML parser library @ http://jsoup.org/ -->
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.8.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
<version>20140107</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-configuration</groupId>
|
||||
<artifactId>commons-configuration</artifactId>
|
||||
<version>1.7</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>1.2.17</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-cli</groupId>
|
||||
<artifactId>commons-cli</artifactId>
|
||||
<version>1.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>1.3.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<version>4.3.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpmime</artifactId>
|
||||
<version>4.3.3</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-site-plugin</artifactId>
|
||||
<version>3.7.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>io.github.zlika</groupId>
|
||||
<artifactId>reproducible-build-maven-plugin</artifactId>
|
||||
<version>0.6</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>com.rarchives.ripme.App</mainClass>
|
||||
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
|
||||
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
|
||||
</manifest>
|
||||
<manifestEntries>
|
||||
<Class-Path>./config</Class-Path>
|
||||
</manifestEntries>
|
||||
</archive>
|
||||
<descriptorRefs>
|
||||
<descriptorRef>jar-with-dependencies</descriptorRef>
|
||||
</descriptorRefs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.1</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.eluder.coveralls</groupId>
|
||||
<artifactId>coveralls-maven-plugin</artifactId>
|
||||
<version>4.3.0</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<!-- At time of writing: JaCoCo is (allegedly) the only coverage report generator that supports Java 8 -->
|
||||
<groupId>org.jacoco</groupId>
|
||||
<artifactId>jacoco-maven-plugin</artifactId>
|
||||
<version>0.8.2</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>prepare-agent</id>
|
||||
<goals>
|
||||
<goal>prepare-agent</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.22.2</version>
|
||||
<configuration>
|
||||
<!--<groups>fast</groups>-->
|
||||
<excludedGroups>slow</excludedGroups>
|
||||
<properties>
|
||||
<!--
|
||||
<configurationParameters>
|
||||
junit.jupiter.conditions.deactivate = *
|
||||
</configurationParameters>
|
||||
-->
|
||||
</properties>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<reporting>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-report-plugin</artifactId>
|
||||
<version>3.0.0-M3</version>
|
||||
<configuration>
|
||||
<showSuccess>false</showSuccess>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</reporting>
|
||||
</project>
|
117
release.py
117
release.py
@ -1,117 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import re
|
||||
|
||||
import os
|
||||
|
||||
import sys
|
||||
from hashlib import sha256
|
||||
from github import Github
|
||||
import json
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Make a new ripme release on github")
|
||||
parser.add_argument("-f", "--file", help="Path to the version of ripme to release")
|
||||
parser.add_argument("-t", "--token", help="Your github personal access token")
|
||||
parser.add_argument("-d", "--debug", help="Run in debug mode", action="store_true")
|
||||
parser.add_argument("-n", "--non-interactive", help="Do not ask for any input from the user", action="store_true")
|
||||
parser.add_argument("--test", help="Perform a dry run (Do everything but upload new release)", action="store_true")
|
||||
parser.add_argument("--skip-hash-check", help="Skip hash check (This should only be used for testing)", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
# This binds input to raw_input on python2, we do this because input acts like eval on python2
|
||||
input = raw_input
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
|
||||
# Make sure the file the user selected is a jar
|
||||
def isJar(filename):
|
||||
if debug:
|
||||
print("Checking if {} is a jar file".format(filename))
|
||||
return filename.endswith("jar")
|
||||
|
||||
|
||||
# Returns true if last entry to the "changeList" section of ripme.json is in the format of $number.$number.$number: and
|
||||
# false if not
|
||||
def isValidCommitMessage(message):
|
||||
if debug:
|
||||
print(r"Checking if {} matches pattern ^\d+\.\d+\.\d+:".format(message))
|
||||
pattern = re.compile(r"^\d+\.\d+\.\d+:")
|
||||
return re.match(pattern, message)
|
||||
|
||||
|
||||
# Checks if the update has the name ripme.jar, if not it renames the file
|
||||
def checkAndRenameFile(path):
|
||||
"""Check if path (a string) points to a ripme.jar. Returns the possibly renamed file path"""
|
||||
if not path.endswith("ripme.jar"):
|
||||
print("Specified file is not named ripme.jar, renaming")
|
||||
new_path = os.path.join(os.path.dirname(path), "ripme.jar")
|
||||
os.rename(path, new_path)
|
||||
return new_path
|
||||
return path
|
||||
|
||||
|
||||
ripmeJson = json.loads(open("ripme.json").read())
|
||||
fileToUploadPath = checkAndRenameFile(args.file)
|
||||
InNoninteractiveMode = args.non_interactive
|
||||
commitMessage = ripmeJson.get("changeList")[0]
|
||||
releaseVersion = ripmeJson.get("latestVersion")
|
||||
debug = args.debug
|
||||
accessToken = args.token
|
||||
repoOwner = "ripmeapp"
|
||||
repoName = "ripme"
|
||||
|
||||
if not os.path.isfile(fileToUploadPath):
|
||||
print("[!] Error: {} does not exist".format(fileToUploadPath))
|
||||
sys.exit(1)
|
||||
|
||||
if not isJar(fileToUploadPath):
|
||||
print("[!] Error: {} is not a jar file!".format(fileToUploadPath))
|
||||
sys.exit(1)
|
||||
|
||||
if not isValidCommitMessage(commitMessage):
|
||||
print("[!] Error: {} is not a valid commit message as it does not start with a version".format(fileToUploadPath))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if not args.skip_hash_check:
|
||||
if debug:
|
||||
print("Reading file {}".format(fileToUploadPath))
|
||||
ripmeUpdate = open(fileToUploadPath, mode='rb').read()
|
||||
|
||||
# The actual hash of the file on disk
|
||||
actualHash = sha256(ripmeUpdate).hexdigest()
|
||||
|
||||
# The hash that we expect the update to have
|
||||
expectedHash = ripmeJson.get("currentHash")
|
||||
|
||||
# Make sure that the hash of the file we're uploading matches the hash in ripme.json. These hashes not matching will
|
||||
# cause ripme to refuse to install the update for all users who haven't disabled update hash checking
|
||||
if expectedHash != actualHash:
|
||||
print("[!] Error: expected hash of file and actual hash differ")
|
||||
print("[!] Expected hash is {}".format(expectedHash))
|
||||
print("[!] Actual hash is {}".format(actualHash))
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("[*] WARNING: SKIPPING HASH CHECK")
|
||||
# Ask the user to review the information before we precede
|
||||
# This only runs in we're in interactive mode
|
||||
if not InNoninteractiveMode:
|
||||
print("File path: {}".format(fileToUploadPath))
|
||||
print("Release title: {}".format(commitMessage))
|
||||
print("Repo: {}/{}".format(repoOwner, repoName))
|
||||
input("\nPlease review the information above and ensure it is correct and then press enter")
|
||||
|
||||
if not args.test:
|
||||
print("Accessing github using token")
|
||||
g = Github(accessToken)
|
||||
|
||||
print("Creating release")
|
||||
release = g.get_user(repoOwner).get_repo(repoName).create_git_release(releaseVersion, commitMessage, "")
|
||||
|
||||
print("Uploading file")
|
||||
release.upload_asset(fileToUploadPath, "ripme.jar")
|
||||
else:
|
||||
print("Not uploading release being script was run with --test flag")
|
31
ripme.json
31
ripme.json
@ -1,6 +1,30 @@
|
||||
{
|
||||
"currentHash": "a2fdb180da195c617cff933fc400d16010d049580188a8eae7eb89e11bd0d4ef",
|
||||
"latestVersion": "2.1.12-7-d0b97acd",
|
||||
"currentHash": "ac40e5ff60f8e0bc7832874de529283a77f9e07d5a7d4a0e8f81e05d43e2df58",
|
||||
"changeList": [
|
||||
"2.1.12-7-d0b97acd, ripme now instead of ripme2 on github, ignore SSL verification option added",
|
||||
"2.1.11-20-ca96ce88, Commer.party next page, Imgur, E-hentai fixed, set recent language.",
|
||||
"2.1.10-21-c94a9543, Imagebam, Unify colons in UI, Motherless, right click menu, rgif fixed",
|
||||
"2.1.9-7-22e915df, HistoryMenuMouseListener right click menu, Imagefap retry logic for getFullSizedImage(), EightmusesRipper fixed",
|
||||
"2.1.8-1-f5153de8: jpg3 add, java-21 adjustments.",
|
||||
"2.1.7-29-b080faae: luciousripper fix, java-21 adjustments.",
|
||||
"2.1.6-1-68189f27: erome fix.",
|
||||
"2.1.5-8-ba51d7b: ripme running with java-17.",
|
||||
"2.1.4-38-836a7494: fixed imagefap ripper.",
|
||||
"2.1.3-15-1b83dc68: relative path now from working dir to subfolder, allowing images to be put in subfolder with same filename, sanatize reddit titles saved as files, additional logging in AbstractHTMLRipper.",
|
||||
"2.1.2-23-e5438e85: caching of first page, retry sleep time, nhentai fixed",
|
||||
"2.1.2-3-ea90b172: better sanitize filenames for windows, save config on update value. reddit, print exceptions in loops and continue.",
|
||||
"2.1.1-3-536339dd: java-11+ necessary to run, work around non existing working directory.",
|
||||
"2.0.4-13-03e32cb7: fix vsco, add danbooru.",
|
||||
"2.0.3: Check new version against ripme2app.",
|
||||
"2.0.2: Add greek translation, fixed reddit, redgif.",
|
||||
"2.0.1: Fixed reddit, tujigu, xhamster, imagebam, erome; marked some tests as flaky.",
|
||||
"2.0.0: Fixed Zizki, WordpressComics, Imagebam; marked some tests as flaky ",
|
||||
"1.7.95: Added porncomixinfo.net; Fixed ripper for HentaiNexus; move l option to before r and R; marked some tests as flaky ",
|
||||
"1.7.94: Added reddit gallery support; Fixed AllporncomicRipper; Fix imagefap ripper; instagramRipper, replaced Nashorn with GraalVM.js",
|
||||
"1.7.93: Fixed Motherless ripper; Fixed e621 ripper; Updated pt_PT translation; Implemented redgifs Ripper; added missing translation to Korean/KR; Fixed elecx ripper; Added ripper for HentaiNexus",
|
||||
"1.7.92: Added read-comic.com ripper; Fix Pawoo ripper; Add ChineseSimplified language file; Fixed artstation ripper",
|
||||
"1.7.91: Fixed luscious ripper. Fixed VK ripper; Added Kingcomix ripper",
|
||||
"1.7.90: Added FitnakedgirlsRipper; Fixed VK Album Ripper; Fixed Myreadingmanga Ripper; Fixed windows max file name; Fixed Pornhub Video Ripper; Fixed Motherless Ripper; Fixed Instagram Ripper",
|
||||
"1.7.89: Improved twitter ripper; Fixed xhamster image ripper; Fixed allporncomic ripper; Added Ripper for folio.ink",
|
||||
"1.7.88: Added ripper for Myreadingmanga.info; Added Mastodon rippers; Fix queue count update when queue is 0; Added ripper for listal; Now downloads best video when ripping twitter",
|
||||
@ -261,6 +285,5 @@
|
||||
"1.0.4: Fixed spaces-in-directory bug",
|
||||
"1.0.3: Added VK.com ripper",
|
||||
"1.0.1: Added auto-update functionality"
|
||||
],
|
||||
"latestVersion": "1.7.90"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
9
settings.gradle.kts
Normal file
9
settings.gradle.kts
Normal file
@ -0,0 +1,9 @@
|
||||
pluginManagement {
|
||||
repositories {
|
||||
mavenLocal()
|
||||
gradlePluginPortal()
|
||||
// TODO: remove after new build of compose-jb is published
|
||||
maven("https://maven.pkg.jetbrains.space/public/p/compose/dev")
|
||||
}
|
||||
}
|
||||
rootProject.name = "ripme"
|
@ -1,31 +1,5 @@
|
||||
package com.rarchives.ripme;
|
||||
|
||||
import java.awt.*;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
|
||||
import javax.swing.SwingUtilities;
|
||||
|
||||
import org.apache.commons.cli.BasicParser;
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.HelpFormatter;
|
||||
import org.apache.commons.cli.Options;
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.apache.commons.lang.SystemUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractRipper;
|
||||
import com.rarchives.ripme.ui.History;
|
||||
import com.rarchives.ripme.ui.HistoryEntry;
|
||||
@ -35,6 +9,31 @@ import com.rarchives.ripme.utils.Proxy;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.DefaultParser;
|
||||
import org.apache.commons.cli.HelpFormatter;
|
||||
import org.apache.commons.cli.Options;
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.apache.commons.lang.SystemUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import javax.swing.*;
|
||||
import java.awt.*;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Entry point to application.
|
||||
* This is where all the fun happens, with the main method.
|
||||
@ -44,7 +43,7 @@ import com.rarchives.ripme.utils.Utils;
|
||||
*/
|
||||
public class App {
|
||||
|
||||
public static final Logger logger = Logger.getLogger(App.class);
|
||||
public static final Logger logger = LogManager.getLogger(App.class);
|
||||
public static String stringToAppendToFoldername = null;
|
||||
private static final History HISTORY = new History();
|
||||
|
||||
@ -54,11 +53,11 @@ public class App {
|
||||
*
|
||||
* @param args Array of command line arguments.
|
||||
*/
|
||||
public static void main(String[] args) throws MalformedURLException {
|
||||
public static void main(String[] args) throws IOException {
|
||||
CommandLine cl = getArgs(args);
|
||||
|
||||
if (args.length > 0 && cl.hasOption('v')){
|
||||
logger.info(UpdateUtils.getThisJarVersion());
|
||||
System.out.println(UpdateUtils.getThisJarVersion());
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
@ -113,7 +112,7 @@ public class App {
|
||||
entry.dir = ripper.getWorkingDir().getAbsolutePath();
|
||||
try {
|
||||
entry.title = ripper.getAlbumTitle(ripper.getURL());
|
||||
} catch (MalformedURLException e) { }
|
||||
} catch (MalformedURLException ignored) { }
|
||||
HISTORY.add(entry);
|
||||
}
|
||||
}
|
||||
@ -122,7 +121,7 @@ public class App {
|
||||
* For dealing with command-line arguments.
|
||||
* @param args Array of Command-line arguments
|
||||
*/
|
||||
private static void handleArguments(String[] args) {
|
||||
private static void handleArguments(String[] args) throws IOException {
|
||||
CommandLine cl = getArgs(args);
|
||||
|
||||
//Help (list commands)
|
||||
@ -169,6 +168,12 @@ public class App {
|
||||
Utils.setConfigBoolean("errors.skip404", true);
|
||||
}
|
||||
|
||||
//Destination directory
|
||||
if (cl.hasOption('l')) {
|
||||
// change the default rips directory
|
||||
Utils.setConfigString("rips.directory", cl.getOptionValue('l'));
|
||||
}
|
||||
|
||||
//Re-rip <i>all</i> previous albums
|
||||
if (cl.hasOption('r')) {
|
||||
// Re-rip all via command-line
|
||||
@ -179,7 +184,7 @@ public class App {
|
||||
}
|
||||
for (HistoryEntry entry : HISTORY.toList()) {
|
||||
try {
|
||||
URL url = new URL(entry.url);
|
||||
URL url = new URI(entry.url).toURL();
|
||||
rip(url);
|
||||
} catch (Exception e) {
|
||||
logger.error("[!] Failed to rip URL " + entry.url, e);
|
||||
@ -208,7 +213,7 @@ public class App {
|
||||
if (entry.selected) {
|
||||
added++;
|
||||
try {
|
||||
URL url = new URL(entry.url);
|
||||
URL url = new URI(entry.url).toURL();
|
||||
rip(url);
|
||||
} catch (Exception e) {
|
||||
logger.error("[!] Failed to rip URL " + entry.url, e);
|
||||
@ -245,17 +250,11 @@ public class App {
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
//Destination directory
|
||||
if (cl.hasOption('l')) {
|
||||
// change the default rips directory
|
||||
Utils.setConfigString("rips.directory", cl.getOptionValue('l'));
|
||||
}
|
||||
|
||||
//Read URLs from File
|
||||
if (cl.hasOption('f')) {
|
||||
String filename = cl.getOptionValue('f');
|
||||
Path urlfile = Paths.get(cl.getOptionValue('f'));
|
||||
|
||||
try (BufferedReader br = new BufferedReader(new FileReader(filename))) {
|
||||
try (BufferedReader br = Files.newBufferedReader(urlfile)) {
|
||||
String url;
|
||||
while ((url = br.readLine()) != null) {
|
||||
if (url.startsWith("//") || url.startsWith("#")) {
|
||||
@ -288,11 +287,11 @@ public class App {
|
||||
/**
|
||||
* Attempt to rip targetURL.
|
||||
* @param targetURL URL to rip
|
||||
* @param saveConfig Whether or not you want to save the config (?)
|
||||
* @param saveConfig Whether you want to save the config (?)
|
||||
*/
|
||||
private static void ripURL(String targetURL, boolean saveConfig) {
|
||||
try {
|
||||
URL url = new URL(targetURL);
|
||||
URL url = new URI(targetURL).toURL();
|
||||
rip(url);
|
||||
saveHistory();
|
||||
} catch (MalformedURLException e) {
|
||||
@ -337,7 +336,7 @@ public class App {
|
||||
* @return CommandLine object containing arguments.
|
||||
*/
|
||||
private static CommandLine getArgs(String[] args) {
|
||||
BasicParser parser = new BasicParser();
|
||||
var parser = new DefaultParser();
|
||||
try {
|
||||
return parser.parse(getOptions(), args, false);
|
||||
} catch (ParseException e) {
|
||||
@ -349,19 +348,18 @@ public class App {
|
||||
|
||||
/**
|
||||
* Loads history from history file into memory.
|
||||
* @see MainWindow.loadHistory
|
||||
*/
|
||||
private static void loadHistory() {
|
||||
File historyFile = new File(Utils.getConfigDir() + File.separator + "history.json");
|
||||
private static void loadHistory() throws IOException {
|
||||
Path historyFile = Paths.get(Utils.getConfigDir() + "/history.json");
|
||||
HISTORY.clear();
|
||||
if (historyFile.exists()) {
|
||||
if (Files.exists(historyFile)) {
|
||||
try {
|
||||
logger.info("Loading history from " + historyFile.getCanonicalPath());
|
||||
HISTORY.fromFile(historyFile.getCanonicalPath());
|
||||
logger.info("Loading history from " + historyFile);
|
||||
HISTORY.fromFile(historyFile.toString());
|
||||
} catch (IOException e) {
|
||||
logger.error("Failed to load history from file " + historyFile, e);
|
||||
logger.warn(
|
||||
"RipMe failed to load the history file at " + historyFile.getAbsolutePath() + "\n\n" +
|
||||
"RipMe failed to load the history file at " + historyFile + "\n\n" +
|
||||
"Error: " + e.getMessage() + "\n\n" +
|
||||
"Closing RipMe will automatically overwrite the contents of this file,\n" +
|
||||
"so you may want to back the file up before closing RipMe!");
|
||||
@ -372,16 +370,18 @@ public class App {
|
||||
if (HISTORY.toList().isEmpty()) {
|
||||
// Loaded from config, still no entries.
|
||||
// Guess rip history based on rip folder
|
||||
String[] dirs = Utils.getWorkingDirectory().list((dir, file) -> new File(dir.getAbsolutePath() + File.separator + file).isDirectory());
|
||||
for (String dir : dirs) {
|
||||
String url = RipUtils.urlFromDirectoryName(dir);
|
||||
Stream<Path> stream = Files.list(Utils.getWorkingDirectory())
|
||||
.filter(Files::isDirectory);
|
||||
|
||||
stream.forEach(dir -> {
|
||||
String url = RipUtils.urlFromDirectoryName(dir.toString());
|
||||
if (url != null) {
|
||||
// We found one, add it to history
|
||||
HistoryEntry entry = new HistoryEntry();
|
||||
entry.url = url;
|
||||
HISTORY.add(entry);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -390,7 +390,7 @@ public class App {
|
||||
* @see MainWindow.saveHistory
|
||||
*/
|
||||
private static void saveHistory() {
|
||||
Path historyFile = Paths.get(Utils.getConfigDir() + File.separator + "history.json");
|
||||
Path historyFile = Paths.get(Utils.getConfigDir() + "/history.json");
|
||||
try {
|
||||
if (!Files.exists(historyFile)) {
|
||||
Files.createDirectories(historyFile.getParent());
|
||||
|
@ -2,42 +2,65 @@ package com.rarchives.ripme.ripper;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import com.rarchives.ripme.ui.MainWindow;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
/**
|
||||
* Simplified ripper, designed for ripping from sites by parsing HTML.
|
||||
*/
|
||||
public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
|
||||
private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||
private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||
private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
|
||||
private final Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<>());
|
||||
private final Map<URL, Path> itemsCompleted = Collections.synchronizedMap(new HashMap<>());
|
||||
private final Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<>());
|
||||
Document cachedFirstPage;
|
||||
|
||||
protected AbstractHTMLRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
if(Utils.getConfigBoolean("ssl.verify.off",false)){
|
||||
Http.SSLVerifyOff();
|
||||
}else {
|
||||
Http.undoSSLVerifyOff();
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract String getDomain();
|
||||
public abstract String getHost();
|
||||
|
||||
protected abstract Document getFirstPage() throws IOException;
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
protected Document getFirstPage() throws IOException, URISyntaxException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
protected Document getCachedFirstPage() throws IOException, URISyntaxException {
|
||||
if (cachedFirstPage == null) {
|
||||
cachedFirstPage = getFirstPage();
|
||||
}
|
||||
return cachedFirstPage;
|
||||
}
|
||||
|
||||
public Document getNextPage(Document doc) throws IOException, URISyntaxException {
|
||||
return null;
|
||||
}
|
||||
protected abstract List<String> getURLsFromPage(Document page);
|
||||
protected abstract List<String> getURLsFromPage(Document page) throws UnsupportedEncodingException;
|
||||
protected List<String> getDescriptionsFromPage(Document doc) throws IOException {
|
||||
throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
|
||||
}
|
||||
@ -56,7 +79,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
return url;
|
||||
}
|
||||
protected boolean hasDescriptionSupport() {
|
||||
@ -86,12 +109,12 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
public void rip() throws IOException, URISyntaxException {
|
||||
int index = 0;
|
||||
int textindex = 0;
|
||||
LOGGER.info("Retrieving " + this.url);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
||||
Document doc = getFirstPage();
|
||||
var doc = getCachedFirstPage();
|
||||
|
||||
if (hasQueueSupport() && pageContainsAlbums(this.url)) {
|
||||
List<String> urls = getAlbumsToQueue(doc);
|
||||
@ -104,11 +127,28 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
LOGGER.debug("Adding items from " + this.url + " to queue");
|
||||
}
|
||||
|
||||
List<String> doclocation = new ArrayList<>();
|
||||
|
||||
LOGGER.info("Got doc location " + doc.location());
|
||||
|
||||
while (doc != null) {
|
||||
|
||||
LOGGER.info("Processing a doc...");
|
||||
|
||||
// catch if we saw a doc location already, save the ones seen in a list
|
||||
if (doclocation.contains(doc.location())) {
|
||||
LOGGER.info("Already processed location " + doc.location() + " breaking");
|
||||
break;
|
||||
}
|
||||
doclocation.add(doc.location());
|
||||
|
||||
if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
|
||||
sendUpdate(STATUS.DOWNLOAD_COMPLETE_HISTORY, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
|
||||
break;
|
||||
}
|
||||
|
||||
LOGGER.info("retrieving urls from doc");
|
||||
|
||||
List<String> imageURLs = getURLsFromPage(doc);
|
||||
// If hasASAPRipping() returns true then the ripper will handle downloading the files
|
||||
// if not it's done in the following block of code
|
||||
@ -126,9 +166,9 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
|
||||
for (String imageURL : imageURLs) {
|
||||
index += 1;
|
||||
LOGGER.debug("Found image url #" + index + ": " + imageURL);
|
||||
LOGGER.debug("Found image url #" + index + ": '" + imageURL + "'");
|
||||
downloadURL(new URL(imageURL), index);
|
||||
if (isStopped()) {
|
||||
if (isStopped() || isThisATest()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -139,7 +179,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
if (!textURLs.isEmpty()) {
|
||||
LOGGER.debug("Found description link(s) from " + doc.location());
|
||||
for (String textURL : textURLs) {
|
||||
if (isStopped()) {
|
||||
if (isStopped() || isThisATest()) {
|
||||
break;
|
||||
}
|
||||
textindex += 1;
|
||||
@ -195,7 +235,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
*/
|
||||
private String fileNameFromURL(URL url) {
|
||||
String saveAs = url.toExternalForm();
|
||||
if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;}
|
||||
if (saveAs.substring(saveAs.length() - 1).equals("/")) { saveAs = saveAs.substring(0,saveAs.length() - 1) ;}
|
||||
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
||||
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
|
||||
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
|
||||
@ -250,7 +290,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
}
|
||||
LOGGER.debug("Downloading " + url + "'s description to " + saveFileAs);
|
||||
if (!saveFileAs.getParentFile().exists()) {
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
|
||||
LOGGER.info("[+] Creating directory: " + saveFileAs.getParent());
|
||||
saveFileAs.getParentFile().mkdirs();
|
||||
}
|
||||
return true;
|
||||
@ -281,22 +321,22 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
* Returns total amount of files attempted.
|
||||
/*
|
||||
Returns total amount of files attempted.
|
||||
*/
|
||||
public int getCount() {
|
||||
return itemsCompleted.size() + itemsErrored.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
* Queues multiple URLs of single images to download from a single Album URL
|
||||
/*
|
||||
Queues multiple URLs of single images to download from a single Album URL
|
||||
*/
|
||||
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
|
||||
// Only download one file if this is a test.
|
||||
if (super.isThisATest() &&
|
||||
(itemsPending.size() > 0 || itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
|
||||
public boolean addURLToDownload(URL url, Path saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
|
||||
// Only download one file if this is a test.
|
||||
if (isThisATest() && (itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
|
||||
stop();
|
||||
itemsPending.clear();
|
||||
return false;
|
||||
}
|
||||
if (!allowDuplicates()
|
||||
@ -307,20 +347,24 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
|
||||
return false;
|
||||
}
|
||||
if (shouldIgnoreURL(url)) {
|
||||
sendUpdate(STATUS.DOWNLOAD_SKIP, "Skipping " + url.toExternalForm() + " - ignored extension");
|
||||
return false;
|
||||
}
|
||||
if (Utils.getConfigBoolean("urls_only.save", false)) {
|
||||
// Output URL to file
|
||||
String urlFile = this.workingDir + File.separator + "urls.txt";
|
||||
try (FileWriter fw = new FileWriter(urlFile, true)) {
|
||||
fw.write(url.toExternalForm());
|
||||
fw.write(System.lineSeparator());
|
||||
itemsCompleted.put(url, new File(urlFile));
|
||||
Path urlFile = Paths.get(this.workingDir + "/urls.txt");
|
||||
String text = url.toExternalForm() + System.lineSeparator();
|
||||
try {
|
||||
Files.write(urlFile, text.getBytes(StandardCharsets.UTF_8), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
|
||||
itemsCompleted.put(url, urlFile);
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Error while writing to " + urlFile, e);
|
||||
}
|
||||
}
|
||||
else {
|
||||
itemsPending.put(url, saveAs);
|
||||
DownloadFileThread dft = new DownloadFileThread(url, saveAs, this, getFileExtFromMIME);
|
||||
itemsPending.put(url, saveAs.toFile());
|
||||
DownloadFileThread dft = new DownloadFileThread(url, saveAs.toFile(), this, getFileExtFromMIME);
|
||||
if (referrer != null) {
|
||||
dft.setReferrer(referrer);
|
||||
}
|
||||
@ -334,7 +378,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addURLToDownload(URL url, File saveAs) {
|
||||
public boolean addURLToDownload(URL url, Path saveAs) {
|
||||
return addURLToDownload(url, saveAs, null, null, false);
|
||||
}
|
||||
|
||||
@ -352,10 +396,10 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
* Cleans up & tells user about successful download
|
||||
/*
|
||||
Cleans up & tells user about successful download
|
||||
*/
|
||||
public void downloadCompleted(URL url, File saveAs) {
|
||||
public void downloadCompleted(URL url, Path saveAs) {
|
||||
if (observer == null) {
|
||||
return;
|
||||
}
|
||||
@ -373,7 +417,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
/*
|
||||
* Cleans up & tells user about failed download.
|
||||
*/
|
||||
public void downloadErrored(URL url, String reason) {
|
||||
@ -388,18 +432,18 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
* Tells user that a single file in the album they wish to download has
|
||||
* already been downloaded in the past.
|
||||
/*
|
||||
Tells user that a single file in the album they wish to download has
|
||||
already been downloaded in the past.
|
||||
*/
|
||||
public void downloadExists(URL url, File file) {
|
||||
public void downloadExists(URL url, Path file) {
|
||||
if (observer == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
itemsPending.remove(url);
|
||||
itemsCompleted.put(url, file);
|
||||
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file.getAbsolutePath()));
|
||||
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file));
|
||||
|
||||
checkIfComplete();
|
||||
}
|
||||
@ -421,21 +465,16 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
* Sets directory to save all ripped files to.
|
||||
* @param url
|
||||
* URL to define how the working directory should be saved.
|
||||
* @throws
|
||||
* IOException
|
||||
*/
|
||||
@Override
|
||||
public void setWorkingDir(URL url) throws IOException {
|
||||
String path = Utils.getWorkingDirectory().getCanonicalPath();
|
||||
public void setWorkingDir(URL url) throws IOException, URISyntaxException {
|
||||
Path wd = Utils.getWorkingDirectory();
|
||||
// TODO - change to nio
|
||||
String path = wd.toAbsolutePath().toString();
|
||||
if (!path.endsWith(File.separator)) {
|
||||
path += File.separator;
|
||||
}
|
||||
String title;
|
||||
if (Utils.getConfigBoolean("album_titles.save", true)) {
|
||||
title = getAlbumTitle(this.url);
|
||||
} else {
|
||||
title = super.getAlbumTitle(this.url);
|
||||
}
|
||||
String title = getAlbumTitle(this.url);
|
||||
LOGGER.debug("Using album title '" + title + "'");
|
||||
|
||||
title = Utils.filesystemSafe(title);
|
||||
@ -444,8 +483,10 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
|
||||
this.workingDir = new File(path);
|
||||
if (!this.workingDir.exists()) {
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
|
||||
this.workingDir.mkdirs();
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir.toPath()));
|
||||
if (!this.workingDir.mkdirs()) {
|
||||
throw new IOException("Failed creating dir: \"" + this.workingDir + "\"");
|
||||
}
|
||||
}
|
||||
LOGGER.debug("Set working directory to: " + this.workingDir);
|
||||
}
|
||||
@ -466,13 +507,11 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
*/
|
||||
@Override
|
||||
public String getStatusText() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(getCompletionPercentage())
|
||||
.append("% ")
|
||||
.append("- Pending: " ).append(itemsPending.size())
|
||||
.append(", Completed: ").append(itemsCompleted.size())
|
||||
.append(", Errored: " ).append(itemsErrored.size());
|
||||
return sb.toString();
|
||||
return getCompletionPercentage() +
|
||||
"% " +
|
||||
"- Pending: " + itemsPending.size() +
|
||||
", Completed: " + itemsCompleted.size() +
|
||||
", Errored: " + itemsErrored.size();
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,18 +1,24 @@
|
||||
package com.rarchives.ripme.ripper;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.json.JSONObject;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
/**
|
||||
* Simplified ripper, designed for ripping from sites by parsing JSON.
|
||||
@ -20,7 +26,7 @@ import com.rarchives.ripme.utils.Utils;
|
||||
public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
|
||||
private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||
private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||
private Map<URL, Path> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, Path>());
|
||||
private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
|
||||
|
||||
protected AbstractJSONRipper(URL url) throws IOException {
|
||||
@ -31,8 +37,8 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
@Override
|
||||
public abstract String getHost();
|
||||
|
||||
protected abstract JSONObject getFirstPage() throws IOException;
|
||||
protected JSONObject getNextPage(JSONObject doc) throws IOException {
|
||||
protected abstract JSONObject getFirstPage() throws IOException, URISyntaxException;
|
||||
protected JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException {
|
||||
throw new IOException("getNextPage not implemented");
|
||||
}
|
||||
protected abstract List<String> getURLsFromJSON(JSONObject json);
|
||||
@ -51,12 +57,12 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
return url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
public void rip() throws IOException, URISyntaxException {
|
||||
int index = 0;
|
||||
LOGGER.info("Retrieving " + this.url);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
||||
@ -98,7 +104,7 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
try {
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, "next page");
|
||||
json = getNextPage(json);
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
LOGGER.info("Can't get next page: " + e.getMessage());
|
||||
break;
|
||||
}
|
||||
@ -140,11 +146,11 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
/**
|
||||
* Queues multiple URLs of single images to download from a single Album URL
|
||||
*/
|
||||
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
|
||||
// Only download one file if this is a test.
|
||||
if (super.isThisATest() &&
|
||||
(itemsPending.size() > 0 || itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
|
||||
public boolean addURLToDownload(URL url, Path saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
|
||||
// Only download one file if this is a test.
|
||||
if (super.isThisATest() && (itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
|
||||
stop();
|
||||
itemsPending.clear();
|
||||
return false;
|
||||
}
|
||||
if (!allowDuplicates()
|
||||
@ -155,20 +161,24 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
|
||||
return false;
|
||||
}
|
||||
if (shouldIgnoreURL(url)) {
|
||||
sendUpdate(STATUS.DOWNLOAD_SKIP, "Skipping " + url.toExternalForm() + " - ignored extension");
|
||||
return false;
|
||||
}
|
||||
if (Utils.getConfigBoolean("urls_only.save", false)) {
|
||||
// Output URL to file
|
||||
String urlFile = this.workingDir + File.separator + "urls.txt";
|
||||
try (FileWriter fw = new FileWriter(urlFile, true)) {
|
||||
fw.write(url.toExternalForm());
|
||||
fw.write(System.lineSeparator());
|
||||
itemsCompleted.put(url, new File(urlFile));
|
||||
Path urlFile = Paths.get(this.workingDir + "/urls.txt");
|
||||
String text = url.toExternalForm() + System.lineSeparator();
|
||||
try {
|
||||
Files.write(urlFile, text.getBytes(StandardCharsets.UTF_8), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
|
||||
itemsCompleted.put(url, urlFile);
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Error while writing to " + urlFile, e);
|
||||
}
|
||||
}
|
||||
else {
|
||||
itemsPending.put(url, saveAs);
|
||||
DownloadFileThread dft = new DownloadFileThread(url, saveAs, this, getFileExtFromMIME);
|
||||
itemsPending.put(url, saveAs.toFile());
|
||||
DownloadFileThread dft = new DownloadFileThread(url, saveAs.toFile(), this, getFileExtFromMIME);
|
||||
if (referrer != null) {
|
||||
dft.setReferrer(referrer);
|
||||
}
|
||||
@ -182,7 +192,7 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addURLToDownload(URL url, File saveAs) {
|
||||
public boolean addURLToDownload(URL url, Path saveAs) {
|
||||
return addURLToDownload(url, saveAs, null, null, false);
|
||||
}
|
||||
|
||||
@ -203,7 +213,7 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
/**
|
||||
* Cleans up & tells user about successful download
|
||||
*/
|
||||
public void downloadCompleted(URL url, File saveAs) {
|
||||
public void downloadCompleted(URL url, Path saveAs) {
|
||||
if (observer == null) {
|
||||
return;
|
||||
}
|
||||
@ -240,14 +250,14 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
* Tells user that a single file in the album they wish to download has
|
||||
* already been downloaded in the past.
|
||||
*/
|
||||
public void downloadExists(URL url, File file) {
|
||||
public void downloadExists(URL url, Path file) {
|
||||
if (observer == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
itemsPending.remove(url);
|
||||
itemsCompleted.put(url, file);
|
||||
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file.getAbsolutePath()));
|
||||
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file));
|
||||
|
||||
checkIfComplete();
|
||||
}
|
||||
@ -273,11 +283,8 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
* IOException
|
||||
*/
|
||||
@Override
|
||||
public void setWorkingDir(URL url) throws IOException {
|
||||
String path = Utils.getWorkingDirectory().getCanonicalPath();
|
||||
if (!path.endsWith(File.separator)) {
|
||||
path += File.separator;
|
||||
}
|
||||
public void setWorkingDir(URL url) throws IOException, URISyntaxException {
|
||||
Path wd = Utils.getWorkingDirectory();
|
||||
String title;
|
||||
if (Utils.getConfigBoolean("album_titles.save", true)) {
|
||||
title = getAlbumTitle(this.url);
|
||||
@ -287,15 +294,13 @@ public abstract class AbstractJSONRipper extends AbstractRipper {
|
||||
LOGGER.debug("Using album title '" + title + "'");
|
||||
|
||||
title = Utils.filesystemSafe(title);
|
||||
path += title;
|
||||
path = Utils.getOriginalDirectory(path) + File.separator; // check for case sensitive (unix only)
|
||||
|
||||
this.workingDir = new File(path);
|
||||
if (!this.workingDir.exists()) {
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
|
||||
this.workingDir.mkdirs();
|
||||
wd = wd.resolve(title);
|
||||
if (!Files.exists(wd)) {
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(wd));
|
||||
Files.createDirectory(wd);
|
||||
}
|
||||
LOGGER.debug("Set working directory to: " + this.workingDir);
|
||||
this.workingDir = wd.toFile();
|
||||
LOGGER.info("Set working directory to: {}", this.workingDir);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -8,14 +8,23 @@ import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Observable;
|
||||
import java.util.Scanner;
|
||||
import org.apache.log4j.FileAppender;
|
||||
import org.apache.log4j.Logger;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import org.jsoup.HttpStatusException;
|
||||
import com.rarchives.ripme.App;
|
||||
import com.rarchives.ripme.ui.RipStatusComplete;
|
||||
@ -28,7 +37,7 @@ public abstract class AbstractRipper
|
||||
extends Observable
|
||||
implements RipperInterface, Runnable {
|
||||
|
||||
protected static final Logger LOGGER = Logger.getLogger(AbstractRipper.class);
|
||||
protected static final Logger LOGGER = LogManager.getLogger(AbstractRipper.class);
|
||||
private final String URLHistoryFile = Utils.getURLHistoryFile();
|
||||
|
||||
public static final String USER_AGENT =
|
||||
@ -41,23 +50,24 @@ public abstract class AbstractRipper
|
||||
|
||||
private boolean completed = true;
|
||||
|
||||
public abstract void rip() throws IOException;
|
||||
public abstract void rip() throws IOException, URISyntaxException;
|
||||
public abstract String getHost();
|
||||
public abstract String getGID(URL url) throws MalformedURLException;
|
||||
public abstract String getGID(URL url) throws MalformedURLException, URISyntaxException;
|
||||
public boolean hasASAPRipping() { return false; }
|
||||
// Everytime addUrlToDownload skips a already downloaded url this increases by 1
|
||||
public int alreadyDownloadedUrls = 0;
|
||||
private boolean shouldStop = false;
|
||||
private final AtomicBoolean shouldStop = new AtomicBoolean(false);
|
||||
private static boolean thisIsATest = false;
|
||||
|
||||
public void stop() {
|
||||
shouldStop = true;
|
||||
LOGGER.trace("stop()");
|
||||
shouldStop.set(true);
|
||||
}
|
||||
public boolean isStopped() {
|
||||
return shouldStop;
|
||||
return shouldStop.get();
|
||||
}
|
||||
protected void stopCheck() throws IOException {
|
||||
if (shouldStop) {
|
||||
if (shouldStop.get()) {
|
||||
throw new IOException("Ripping interrupted");
|
||||
}
|
||||
}
|
||||
@ -163,7 +173,11 @@ public abstract class AbstractRipper
|
||||
if (!canRip(url)) {
|
||||
throw new MalformedURLException("Unable to rip url: " + url);
|
||||
}
|
||||
this.url = sanitizeURL(url);
|
||||
try {
|
||||
this.url = sanitizeURL(url);
|
||||
} catch (URISyntaxException e) {
|
||||
throw new MalformedURLException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -175,14 +189,17 @@ public abstract class AbstractRipper
|
||||
* @throws IOException
|
||||
* Always be prepared.
|
||||
*/
|
||||
public void setup() throws IOException {
|
||||
public void setup() throws IOException, URISyntaxException {
|
||||
setWorkingDir(this.url);
|
||||
Logger rootLogger = Logger.getRootLogger();
|
||||
FileAppender fa = (FileAppender) rootLogger.getAppender("FILE");
|
||||
if (fa != null) {
|
||||
fa.setFile(this.workingDir + File.separator + "log.txt");
|
||||
fa.activateOptions();
|
||||
}
|
||||
// we do not care if the rollingfileappender is active, just change the logfile in case
|
||||
// TODO this does not work - not even with
|
||||
// .withFileName("${sys:logFilename}")
|
||||
// in Utils.java, RollingFileAppender.
|
||||
// System.setProperty("logFilename", this.workingDir + "/log.txt");
|
||||
// LOGGER.debug("Changing log file to '{}/log.txt'", this.workingDir);
|
||||
// LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
|
||||
// ctx.reconfigure();
|
||||
// ctx.updateLoggers();
|
||||
|
||||
this.threadPool = new DownloadThreadPool();
|
||||
}
|
||||
@ -199,7 +216,7 @@ public abstract class AbstractRipper
|
||||
* Path of the local file to save the content to.
|
||||
* @return True on success, false on failure.
|
||||
*/
|
||||
public abstract boolean addURLToDownload(URL url, File saveAs);
|
||||
public abstract boolean addURLToDownload(URL url, Path saveAs);
|
||||
|
||||
/**
|
||||
* Queues image to be downloaded and saved.
|
||||
@ -215,7 +232,7 @@ public abstract class AbstractRipper
|
||||
* True if downloaded successfully
|
||||
* False if failed to download
|
||||
*/
|
||||
protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String, String> cookies,
|
||||
protected abstract boolean addURLToDownload(URL url, Path saveAs, String referrer, Map<String, String> cookies,
|
||||
Boolean getFileExtFromMIME);
|
||||
|
||||
/**
|
||||
@ -234,9 +251,10 @@ public abstract class AbstractRipper
|
||||
*/
|
||||
protected boolean addURLToDownload(URL url, Map<String, String> options, Map<String, String> cookies) {
|
||||
// Bit of a hack but this lets us pass a bool using a map<string,String>
|
||||
boolean useMIME = options.getOrDefault("getFileExtFromMIME", "false").toLowerCase().equals("true");
|
||||
return addURLToDownload(url, options.getOrDefault("prefix", ""), options.getOrDefault("subdirectory", ""), options.getOrDefault("referrer", null),
|
||||
cookies, options.getOrDefault("fileName", null), options.getOrDefault("extension", null), useMIME);
|
||||
boolean useMIME = options.getOrDefault("getFileExtFromMIME", "false").equalsIgnoreCase("true");
|
||||
return addURLToDownload(url, options.getOrDefault("subdirectory", ""), options.getOrDefault("referrer", null), cookies,
|
||||
options.getOrDefault("prefix", ""), options.getOrDefault("fileName", null), options.getOrDefault("extension", null),
|
||||
useMIME);
|
||||
}
|
||||
|
||||
|
||||
@ -274,7 +292,7 @@ public abstract class AbstractRipper
|
||||
* True if downloaded successfully
|
||||
* False if failed to download
|
||||
*/
|
||||
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies, String fileName, String extension, Boolean getFileExtFromMIME) {
|
||||
protected boolean addURLToDownload(URL url, String subdirectory, String referrer, Map<String, String> cookies, String prefix, String fileName, String extension, Boolean getFileExtFromMIME) {
|
||||
// A common bug is rippers adding urls that are just "http:". This rejects said urls
|
||||
if (url.toExternalForm().equals("http:") || url.toExternalForm().equals("https:")) {
|
||||
LOGGER.info(url.toExternalForm() + " is a invalid url amd will be changed");
|
||||
@ -285,8 +303,8 @@ public abstract class AbstractRipper
|
||||
if (url.toExternalForm().contains(" ")) {
|
||||
// If for some reason the url with all spaces encoded as %20 is malformed print an error
|
||||
try {
|
||||
url = new URL(url.toExternalForm().replaceAll(" ", "%20"));
|
||||
} catch (MalformedURLException e) {
|
||||
url = new URI(url.toExternalForm().replaceAll(" ", "%20")).toURL();
|
||||
} catch (MalformedURLException | URISyntaxException e) {
|
||||
LOGGER.error("Unable to remove spaces from url\nURL: " + url.toExternalForm());
|
||||
e.printStackTrace();
|
||||
}
|
||||
@ -305,34 +323,19 @@ public abstract class AbstractRipper
|
||||
LOGGER.debug("Ripper has been stopped");
|
||||
return false;
|
||||
}
|
||||
LOGGER.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies + ", fileName: " + fileName);
|
||||
String saveAs = getFileName(url, fileName, extension);
|
||||
File saveFileAs;
|
||||
LOGGER.debug("url: " + url + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies + ", prefix: " + prefix + ", fileName: " + fileName);
|
||||
Path saveAs;
|
||||
try {
|
||||
if (!subdirectory.equals("")) {
|
||||
subdirectory = Utils.filesystemSafe(subdirectory);
|
||||
subdirectory = File.separator + subdirectory;
|
||||
saveAs = getFilePath(url, subdirectory, prefix, fileName, extension);
|
||||
LOGGER.debug("Downloading " + url + " to " + saveAs);
|
||||
if (!Files.exists(saveAs.getParent())) {
|
||||
LOGGER.info("[+] Creating directory: " + saveAs.getParent());
|
||||
Files.createDirectories(saveAs.getParent());
|
||||
}
|
||||
prefix = Utils.filesystemSanitized(prefix);
|
||||
String topFolderName = workingDir.getCanonicalPath();
|
||||
if (App.stringToAppendToFoldername != null) {
|
||||
topFolderName = topFolderName + App.stringToAppendToFoldername;
|
||||
}
|
||||
saveFileAs = new File(
|
||||
topFolderName
|
||||
+ subdirectory
|
||||
+ File.separator
|
||||
+ prefix
|
||||
+ saveAs);
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("[!] Error creating save file path for URL '" + url + "':", e);
|
||||
return false;
|
||||
}
|
||||
LOGGER.debug("Downloading " + url + " to " + saveFileAs);
|
||||
if (!saveFileAs.getParentFile().exists()) {
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
|
||||
saveFileAs.getParentFile().mkdirs();
|
||||
}
|
||||
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
||||
LOGGER.info("Writing " + url.toExternalForm() + " to file");
|
||||
try {
|
||||
@ -341,11 +344,11 @@ public abstract class AbstractRipper
|
||||
LOGGER.debug("Unable to write URL history file");
|
||||
}
|
||||
}
|
||||
return addURLToDownload(url, saveFileAs, referrer, cookies, getFileExtFromMIME);
|
||||
return addURLToDownload(url, saveAs, referrer, cookies, getFileExtFromMIME);
|
||||
}
|
||||
|
||||
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String,String> cookies, String fileName, String extension) {
|
||||
return addURLToDownload(url, prefix, subdirectory, referrer, cookies, fileName, extension, false);
|
||||
return addURLToDownload(url, subdirectory, referrer, cookies, prefix, fileName, extension, false);
|
||||
}
|
||||
|
||||
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies, String fileName) {
|
||||
@ -384,33 +387,53 @@ public abstract class AbstractRipper
|
||||
return addURLToDownload(url, prefix, "");
|
||||
}
|
||||
|
||||
public static String getFileName(URL url, String fileName, String extension) {
|
||||
String saveAs;
|
||||
if (fileName != null) {
|
||||
saveAs = fileName;
|
||||
} else {
|
||||
saveAs = url.toExternalForm();
|
||||
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
||||
public Path getFilePath(URL url, String subdir, String prefix, String fileName, String extension) throws IOException {
|
||||
// construct the path: workingdir + subdir + prefix + filename + extension
|
||||
// save into working dir
|
||||
Path filepath = Paths.get(workingDir.getCanonicalPath());
|
||||
|
||||
if (null != App.stringToAppendToFoldername)
|
||||
filepath = filepath.resolveSibling(filepath.getFileName() + App.stringToAppendToFoldername);
|
||||
|
||||
if (null != subdir && !subdir.trim().isEmpty())
|
||||
filepath = filepath.resolve(Utils.filesystemSafe(subdir));
|
||||
|
||||
filepath = filepath.resolve(getFileName(url, prefix, fileName, extension));
|
||||
return filepath;
|
||||
}
|
||||
|
||||
public static String getFileName(URL url, String prefix, String fileName, String extension) {
|
||||
// retrieve filename from URL if not passed
|
||||
if (fileName == null || fileName.trim().isEmpty()) {
|
||||
fileName = url.toExternalForm();
|
||||
fileName = fileName.substring(fileName.lastIndexOf('/')+1);
|
||||
}
|
||||
if (extension == null) {
|
||||
if (fileName.indexOf('?') >= 0) { fileName = fileName.substring(0, fileName.indexOf('?')); }
|
||||
if (fileName.indexOf('#') >= 0) { fileName = fileName.substring(0, fileName.indexOf('#')); }
|
||||
if (fileName.indexOf('&') >= 0) { fileName = fileName.substring(0, fileName.indexOf('&')); }
|
||||
if (fileName.indexOf(':') >= 0) { fileName = fileName.substring(0, fileName.indexOf(':')); }
|
||||
|
||||
// add prefix
|
||||
if (prefix != null && !prefix.trim().isEmpty()) {
|
||||
fileName = prefix + fileName;
|
||||
}
|
||||
|
||||
// retrieve extension from URL if not passed, no extension if nothing found
|
||||
if (extension == null || extension.trim().isEmpty()) {
|
||||
// Get the extension of the file
|
||||
String[] lastBitOfURL = url.toExternalForm().split("/");
|
||||
|
||||
String[] lastBit = lastBitOfURL[lastBitOfURL.length - 1].split(".");
|
||||
if (lastBit.length != 0) {
|
||||
extension = lastBit[lastBit.length - 1];
|
||||
saveAs = saveAs + "." + extension;
|
||||
}
|
||||
}
|
||||
|
||||
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
|
||||
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
|
||||
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
|
||||
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
|
||||
// if extension is passed or found, add it
|
||||
if (extension != null) {
|
||||
saveAs = saveAs + "." + extension;
|
||||
fileName = fileName + "." + extension;
|
||||
}
|
||||
return saveAs;
|
||||
// make sure filename is not too long and has no unsupported chars
|
||||
return Utils.sanitizeSaveAs(fileName);
|
||||
}
|
||||
|
||||
|
||||
@ -443,20 +466,16 @@ public abstract class AbstractRipper
|
||||
* @param saveAs
|
||||
* Where the downloaded file is stored.
|
||||
*/
|
||||
public abstract void downloadCompleted(URL url, File saveAs);
|
||||
public abstract void downloadCompleted(URL url, Path saveAs);
|
||||
/**
|
||||
* Notifies observers that a file could not be downloaded (includes a reason).
|
||||
* @param url
|
||||
* @param reason
|
||||
*/
|
||||
public abstract void downloadErrored(URL url, String reason);
|
||||
/**
|
||||
* Notify observers that a download could not be completed,
|
||||
* but was not technically an "error".
|
||||
* @param url
|
||||
* @param file
|
||||
*/
|
||||
public abstract void downloadExists(URL url, File file);
|
||||
public abstract void downloadExists(URL url, Path file);
|
||||
|
||||
/**
|
||||
* @return Number of files downloaded.
|
||||
@ -478,17 +497,17 @@ public abstract class AbstractRipper
|
||||
completed = true;
|
||||
LOGGER.info(" Rip completed!");
|
||||
|
||||
RipStatusComplete rsc = new RipStatusComplete(workingDir, getCount());
|
||||
RipStatusComplete rsc = new RipStatusComplete(workingDir.toPath(), getCount());
|
||||
RipStatusMessage msg = new RipStatusMessage(STATUS.RIP_COMPLETE, rsc);
|
||||
observer.update(this, msg);
|
||||
|
||||
Logger rootLogger = Logger.getRootLogger();
|
||||
FileAppender fa = (FileAppender) rootLogger.getAppender("FILE");
|
||||
if (fa != null) {
|
||||
LOGGER.debug("Changing log file back to 'ripme.log'");
|
||||
fa.setFile("ripme.log");
|
||||
fa.activateOptions();
|
||||
}
|
||||
// we do not care if the rollingfileappender is active, just change the logfile in case
|
||||
// TODO - does not work.
|
||||
// System.setProperty("logFilename", "ripme.log");
|
||||
// LOGGER.debug("Changing log file back to 'ripme.log'");
|
||||
// LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
|
||||
// ctx.reconfigure();
|
||||
|
||||
if (Utils.getConfigBoolean("urls_only.save", false)) {
|
||||
String urlFile = this.workingDir + File.separator + "urls.txt";
|
||||
try {
|
||||
@ -519,7 +538,7 @@ public abstract class AbstractRipper
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract void setWorkingDir(URL url) throws IOException;
|
||||
public abstract void setWorkingDir(URL url) throws IOException, URISyntaxException;
|
||||
|
||||
/**
|
||||
*
|
||||
@ -532,8 +551,12 @@ public abstract class AbstractRipper
|
||||
* @throws MalformedURLException
|
||||
* If any of those damned URLs gets malformed.
|
||||
*/
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
return getHost() + "_" + getGID(url);
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
return getHost() + "_" + getGID(url);
|
||||
} catch (URISyntaxException e) {
|
||||
throw new MalformedURLException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -572,7 +595,6 @@ public abstract class AbstractRipper
|
||||
* The package name.
|
||||
* @return
|
||||
* List of constructors for all eligible Rippers.
|
||||
* @throws Exception
|
||||
*/
|
||||
public static List<Constructor<?>> getRipperConstructors(String pkg) throws Exception {
|
||||
List<Constructor<?>> constructors = new ArrayList<>();
|
||||
@ -586,8 +608,7 @@ public abstract class AbstractRipper
|
||||
|
||||
/**
|
||||
* Sends an update message to the relevant observer(s) on this ripper.
|
||||
* @param status
|
||||
* @param message
|
||||
* @param status
|
||||
*/
|
||||
public void sendUpdate(STATUS status, Object message) {
|
||||
if (observer == null) {
|
||||
@ -679,4 +700,18 @@ public abstract class AbstractRipper
|
||||
protected boolean useByteProgessBar() { return false;}
|
||||
// If true ripme will try to resume a broken download for this ripper
|
||||
protected boolean tryResumeDownload() { return false;}
|
||||
}
|
||||
|
||||
protected boolean shouldIgnoreURL(URL url) {
|
||||
final String[] ignoredExtensions = Utils.getConfigStringArray("download.ignore_extensions");
|
||||
if (ignoredExtensions == null || ignoredExtensions.length == 0) return false; // nothing ignored
|
||||
String[] pathElements = url.getPath().split("\\.");
|
||||
if (pathElements.length == 0) return false; // no extension, can't filter
|
||||
String extension = pathElements[pathElements.length - 1];
|
||||
for (String ignoredExtension : ignoredExtensions) {
|
||||
if (ignoredExtension.equalsIgnoreCase(extension)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
@ -1,27 +1,34 @@
|
||||
package com.rarchives.ripme.ripper;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
// Should this file even exist? It does the same thing as abstractHTML ripper
|
||||
|
||||
/**'
|
||||
* For ripping delicious albums off the interwebz.
|
||||
* @deprecated Use AbstractHTMLRipper instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public abstract class AlbumRipper extends AbstractRipper {
|
||||
|
||||
private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||
private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||
private Map<URL, Path> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, Path>());
|
||||
private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
|
||||
|
||||
protected AlbumRipper(URL url) throws IOException {
|
||||
@ -29,10 +36,10 @@ public abstract class AlbumRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
public abstract boolean canRip(URL url);
|
||||
public abstract URL sanitizeURL(URL url) throws MalformedURLException;
|
||||
public abstract URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException;
|
||||
public abstract void rip() throws IOException;
|
||||
public abstract String getHost();
|
||||
public abstract String getGID(URL url) throws MalformedURLException;
|
||||
public abstract String getGID(URL url) throws MalformedURLException, URISyntaxException;
|
||||
|
||||
protected boolean allowDuplicates() {
|
||||
return false;
|
||||
@ -50,11 +57,11 @@ public abstract class AlbumRipper extends AbstractRipper {
|
||||
/**
|
||||
* Queues multiple URLs of single images to download from a single Album URL
|
||||
*/
|
||||
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
|
||||
// Only download one file if this is a test.
|
||||
if (super.isThisATest() &&
|
||||
(itemsPending.size() > 0 || itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
|
||||
public boolean addURLToDownload(URL url, Path saveAs, String referrer, Map<String,String> cookies, Boolean getFileExtFromMIME) {
|
||||
// Only download one file if this is a test.
|
||||
if (super.isThisATest() && (itemsCompleted.size() > 0 || itemsErrored.size() > 0)) {
|
||||
stop();
|
||||
itemsPending.clear();
|
||||
return false;
|
||||
}
|
||||
if (!allowDuplicates()
|
||||
@ -65,20 +72,24 @@ public abstract class AlbumRipper extends AbstractRipper {
|
||||
LOGGER.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
|
||||
return false;
|
||||
}
|
||||
if (shouldIgnoreURL(url)) {
|
||||
sendUpdate(STATUS.DOWNLOAD_SKIP, "Skipping " + url.toExternalForm() + " - ignored extension");
|
||||
return false;
|
||||
}
|
||||
if (Utils.getConfigBoolean("urls_only.save", false)) {
|
||||
// Output URL to file
|
||||
String urlFile = this.workingDir + File.separator + "urls.txt";
|
||||
try (FileWriter fw = new FileWriter(urlFile, true)) {
|
||||
fw.write(url.toExternalForm());
|
||||
fw.write(System.lineSeparator());
|
||||
itemsCompleted.put(url, new File(urlFile));
|
||||
Path urlFile = Paths.get(this.workingDir + "/urls.txt");
|
||||
String text = url.toExternalForm() + System.lineSeparator();
|
||||
try {
|
||||
Files.write(urlFile, text.getBytes(StandardCharsets.UTF_8), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
|
||||
itemsCompleted.put(url, urlFile);
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Error while writing to " + urlFile, e);
|
||||
}
|
||||
}
|
||||
else {
|
||||
itemsPending.put(url, saveAs);
|
||||
DownloadFileThread dft = new DownloadFileThread(url, saveAs, this, getFileExtFromMIME);
|
||||
itemsPending.put(url, saveAs.toFile());
|
||||
DownloadFileThread dft = new DownloadFileThread(url, saveAs.toFile(), this, getFileExtFromMIME);
|
||||
if (referrer != null) {
|
||||
dft.setReferrer(referrer);
|
||||
}
|
||||
@ -92,7 +103,7 @@ public abstract class AlbumRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addURLToDownload(URL url, File saveAs) {
|
||||
public boolean addURLToDownload(URL url, Path saveAs) {
|
||||
return addURLToDownload(url, saveAs, null, null, false);
|
||||
}
|
||||
|
||||
@ -113,7 +124,7 @@ public abstract class AlbumRipper extends AbstractRipper {
|
||||
/**
|
||||
* Cleans up & tells user about successful download
|
||||
*/
|
||||
public void downloadCompleted(URL url, File saveAs) {
|
||||
public void downloadCompleted(URL url, Path saveAs) {
|
||||
if (observer == null) {
|
||||
return;
|
||||
}
|
||||
@ -150,14 +161,14 @@ public abstract class AlbumRipper extends AbstractRipper {
|
||||
* Tells user that a single file in the album they wish to download has
|
||||
* already been downloaded in the past.
|
||||
*/
|
||||
public void downloadExists(URL url, File file) {
|
||||
public void downloadExists(URL url, Path file) {
|
||||
if (observer == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
itemsPending.remove(url);
|
||||
itemsCompleted.put(url, file);
|
||||
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file.getAbsolutePath()));
|
||||
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_WARN, url + " already saved as " + file));
|
||||
|
||||
checkIfComplete();
|
||||
}
|
||||
@ -183,8 +194,10 @@ public abstract class AlbumRipper extends AbstractRipper {
|
||||
* IOException
|
||||
*/
|
||||
@Override
|
||||
public void setWorkingDir(URL url) throws IOException {
|
||||
String path = Utils.getWorkingDirectory().getCanonicalPath();
|
||||
public void setWorkingDir(URL url) throws IOException, URISyntaxException {
|
||||
Path wd = Utils.getWorkingDirectory();
|
||||
// TODO - change to nio
|
||||
String path = wd.toAbsolutePath().toString();
|
||||
if (!path.endsWith(File.separator)) {
|
||||
path += File.separator;
|
||||
}
|
||||
@ -202,7 +215,7 @@ public abstract class AlbumRipper extends AbstractRipper {
|
||||
|
||||
this.workingDir = new File(path);
|
||||
if (!this.workingDir.exists()) {
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir.toPath()));
|
||||
this.workingDir.mkdirs();
|
||||
}
|
||||
LOGGER.debug("Set working directory to: " + this.workingDir);
|
||||
|
@ -1,19 +1,17 @@
|
||||
package com.rarchives.ripme.ripper;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.net.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.ResourceBundle;
|
||||
|
||||
import javax.net.ssl.HttpsURLConnection;
|
||||
|
||||
import com.rarchives.ripme.ui.MainWindow;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.jsoup.HttpStatusException;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
@ -23,29 +21,31 @@ import com.rarchives.ripme.utils.Utils;
|
||||
* Thread for downloading files. Includes retry logic, observer notifications,
|
||||
* and other goodies.
|
||||
*/
|
||||
class DownloadFileThread extends Thread {
|
||||
private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
|
||||
class DownloadFileThread implements Runnable {
|
||||
private static final Logger logger = LogManager.getLogger(DownloadFileThread.class);
|
||||
|
||||
private String referrer = "";
|
||||
private Map<String, String> cookies = new HashMap<>();
|
||||
|
||||
private URL url;
|
||||
private final URL url;
|
||||
private File saveAs;
|
||||
private String prettySaveAs;
|
||||
private AbstractRipper observer;
|
||||
private int retries;
|
||||
private Boolean getFileExtFromMIME;
|
||||
private final String prettySaveAs;
|
||||
private final AbstractRipper observer;
|
||||
private final int retries;
|
||||
private final Boolean getFileExtFromMIME;
|
||||
|
||||
private final int TIMEOUT;
|
||||
|
||||
private final int retrySleep;
|
||||
public DownloadFileThread(URL url, File saveAs, AbstractRipper observer, Boolean getFileExtFromMIME) {
|
||||
super();
|
||||
this.url = url;
|
||||
this.saveAs = saveAs;
|
||||
this.prettySaveAs = Utils.removeCWD(saveAs);
|
||||
this.prettySaveAs = Utils.removeCWD(saveAs.toPath());
|
||||
this.observer = observer;
|
||||
this.retries = Utils.getConfigInteger("download.retries", 1);
|
||||
this.TIMEOUT = Utils.getConfigInteger("download.timeout", 60000);
|
||||
this.retrySleep = Utils.getConfigInteger("download.retry.sleep", 0);
|
||||
this.getFileExtFromMIME = getFileExtFromMIME;
|
||||
}
|
||||
|
||||
@ -61,12 +61,13 @@ class DownloadFileThread extends Thread {
|
||||
* Attempts to download the file. Retries as needed. Notifies observers upon
|
||||
* completion/error/warn.
|
||||
*/
|
||||
@Override
|
||||
public void run() {
|
||||
// First thing we make sure the file name doesn't have any illegal chars in it
|
||||
saveAs = new File(
|
||||
saveAs.getParentFile().getAbsolutePath() + File.separator + Utils.sanitizeSaveAs(saveAs.getName()));
|
||||
long fileSize = 0;
|
||||
int bytesTotal = 0;
|
||||
int bytesTotal;
|
||||
int bytesDownloaded = 0;
|
||||
if (saveAs.exists() && observer.tryResumeDownload()) {
|
||||
fileSize = saveAs.length();
|
||||
@ -78,15 +79,15 @@ class DownloadFileThread extends Thread {
|
||||
return;
|
||||
}
|
||||
if (saveAs.exists() && !observer.tryResumeDownload() && !getFileExtFromMIME
|
||||
|| Utils.fuzzyExists(new File(saveAs.getParent()), saveAs.getName()) && getFileExtFromMIME
|
||||
|| Utils.fuzzyExists(Paths.get(saveAs.getParent()), saveAs.getName()) && getFileExtFromMIME
|
||||
&& !observer.tryResumeDownload()) {
|
||||
if (Utils.getConfigBoolean("file.overwrite", false)) {
|
||||
logger.info("[!] " + Utils.getLocalizedString("deleting.existing.file") + prettySaveAs);
|
||||
saveAs.delete();
|
||||
if (!saveAs.delete()) logger.error("could not delete existing file: " + saveAs.getAbsolutePath());
|
||||
} else {
|
||||
logger.info("[!] " + Utils.getLocalizedString("skipping") + url + " -- "
|
||||
logger.info("[!] " + Utils.getLocalizedString("skipping") + " " + url + " -- "
|
||||
+ Utils.getLocalizedString("file.already.exists") + ": " + prettySaveAs);
|
||||
observer.downloadExists(url, saveAs);
|
||||
observer.downloadExists(url, saveAs.toPath());
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -95,8 +96,6 @@ class DownloadFileThread extends Thread {
|
||||
int tries = 0; // Number of attempts to download
|
||||
do {
|
||||
tries += 1;
|
||||
InputStream bis = null;
|
||||
OutputStream fos = null;
|
||||
try {
|
||||
logger.info(" Downloading file: " + urlToDownload + (tries > 0 ? " Retry #" + tries : ""));
|
||||
observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
|
||||
@ -119,14 +118,14 @@ class DownloadFileThread extends Thread {
|
||||
huc.setRequestProperty("Referer", referrer); // Sic
|
||||
}
|
||||
huc.setRequestProperty("User-agent", AbstractRipper.USER_AGENT);
|
||||
String cookie = "";
|
||||
StringBuilder cookie = new StringBuilder();
|
||||
for (String key : cookies.keySet()) {
|
||||
if (!cookie.equals("")) {
|
||||
cookie += "; ";
|
||||
if (!cookie.toString().equals("")) {
|
||||
cookie.append("; ");
|
||||
}
|
||||
cookie += key + "=" + cookies.get(key);
|
||||
cookie.append(key).append("=").append(cookies.get(key));
|
||||
}
|
||||
huc.setRequestProperty("Cookie", cookie);
|
||||
huc.setRequestProperty("Cookie", cookie.toString());
|
||||
if (observer.tryResumeDownload()) {
|
||||
if (fileSize != 0) {
|
||||
huc.setRequestProperty("Range", "bytes=" + fileSize + "-");
|
||||
@ -150,7 +149,7 @@ class DownloadFileThread extends Thread {
|
||||
redirected = true;
|
||||
}
|
||||
String location = huc.getHeaderField("Location");
|
||||
urlToDownload = new URL(location);
|
||||
urlToDownload = new URI(location).toURL();
|
||||
// Throw exception so download can be retried
|
||||
throw new IOException("Redirect status code " + statusCode + " - redirect to " + location);
|
||||
}
|
||||
@ -184,6 +183,7 @@ class DownloadFileThread extends Thread {
|
||||
}
|
||||
|
||||
// Save file
|
||||
InputStream bis;
|
||||
bis = new BufferedInputStream(huc.getInputStream());
|
||||
|
||||
// Check if we should get the file ext from the MIME type
|
||||
@ -209,6 +209,7 @@ class DownloadFileThread extends Thread {
|
||||
}
|
||||
}
|
||||
// If we're resuming a download we append data to the existing file
|
||||
OutputStream fos = null;
|
||||
if (statusCode == 206) {
|
||||
fos = new FileOutputStream(saveAs, true);
|
||||
} else {
|
||||
@ -235,9 +236,11 @@ class DownloadFileThread extends Thread {
|
||||
} else if (saveAs.getAbsolutePath().length() > 259 && Utils.isWindows()) {
|
||||
// This if is for when the file path has gone above 260 chars which windows does
|
||||
// not allow
|
||||
fos = new FileOutputStream(
|
||||
fos = Files.newOutputStream(
|
||||
Utils.shortenSaveAsWindows(saveAs.getParentFile().getPath(), saveAs.getName()));
|
||||
assert fos != null: "After shortenSaveAsWindows: " + saveAs.getAbsolutePath();
|
||||
}
|
||||
assert fos != null: e.getStackTrace();
|
||||
}
|
||||
}
|
||||
byte[] data = new byte[1024 * 256];
|
||||
@ -278,24 +281,17 @@ class DownloadFileThread extends Thread {
|
||||
"HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
|
||||
return;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
logger.debug("IOException", e);
|
||||
logger.error("[!] " + Utils.getLocalizedString("exception.while.downloading.file") + ": " + url + " - "
|
||||
+ e.getMessage());
|
||||
} finally {
|
||||
// Close any open streams
|
||||
try {
|
||||
if (bis != null) {
|
||||
bis.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
}
|
||||
try {
|
||||
if (fos != null) {
|
||||
fos.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
}
|
||||
} catch (NullPointerException npe){
|
||||
|
||||
logger.error("[!] " + Utils.getLocalizedString("failed.to.download") + " for URL " + url);
|
||||
observer.downloadErrored(url,
|
||||
Utils.getLocalizedString("failed.to.download") + " " + url.toExternalForm());
|
||||
return;
|
||||
|
||||
}
|
||||
if (tries > this.retries) {
|
||||
logger.error("[!] " + Utils.getLocalizedString("exceeded.maximum.retries") + " (" + this.retries
|
||||
@ -303,9 +299,13 @@ class DownloadFileThread extends Thread {
|
||||
observer.downloadErrored(url,
|
||||
Utils.getLocalizedString("failed.to.download") + " " + url.toExternalForm());
|
||||
return;
|
||||
} else {
|
||||
if (retrySleep > 0) {
|
||||
Utils.sleep(retrySleep);
|
||||
}
|
||||
}
|
||||
} while (true);
|
||||
observer.downloadCompleted(url, saveAs);
|
||||
observer.downloadCompleted(url, saveAs.toPath());
|
||||
logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
|
||||
}
|
||||
|
||||
|
@ -4,16 +4,16 @@ import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Simple wrapper around a FixedThreadPool.
|
||||
*/
|
||||
public class DownloadThreadPool {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(DownloadThreadPool.class);
|
||||
private static final Logger logger = LogManager.getLogger(DownloadThreadPool.class);
|
||||
private ThreadPoolExecutor threadPool = null;
|
||||
|
||||
public DownloadThreadPool() {
|
||||
@ -35,10 +35,10 @@ public class DownloadThreadPool {
|
||||
}
|
||||
/**
|
||||
* For adding threads to execution pool.
|
||||
* @param t
|
||||
* @param t
|
||||
* Thread to be added.
|
||||
*/
|
||||
public void addThread(Thread t) {
|
||||
public void addThread(Runnable t) {
|
||||
threadPool.execute(t);
|
||||
}
|
||||
|
||||
|
@ -1,36 +1,36 @@
|
||||
package com.rarchives.ripme.ripper;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import javax.net.ssl.HttpsURLConnection;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Thread for downloading files.
|
||||
* Includes retry logic, observer notifications, and other goodies.
|
||||
*/
|
||||
class DownloadVideoThread extends Thread {
|
||||
class DownloadVideoThread implements Runnable {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(DownloadVideoThread.class);
|
||||
private static final Logger logger = LogManager.getLogger(DownloadVideoThread.class);
|
||||
|
||||
private URL url;
|
||||
private File saveAs;
|
||||
private String prettySaveAs;
|
||||
private AbstractRipper observer;
|
||||
private int retries;
|
||||
private final URL url;
|
||||
private final Path saveAs;
|
||||
private final String prettySaveAs;
|
||||
private final AbstractRipper observer;
|
||||
private final int retries;
|
||||
|
||||
public DownloadVideoThread(URL url, File saveAs, AbstractRipper observer) {
|
||||
public DownloadVideoThread(URL url, Path saveAs, AbstractRipper observer) {
|
||||
super();
|
||||
this.url = url;
|
||||
this.saveAs = saveAs;
|
||||
@ -43,6 +43,7 @@ class DownloadVideoThread extends Thread {
|
||||
* Attempts to download the file. Retries as needed.
|
||||
* Notifies observers upon completion/error/warn.
|
||||
*/
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
observer.stopCheck();
|
||||
@ -50,10 +51,14 @@ class DownloadVideoThread extends Thread {
|
||||
observer.downloadErrored(url, "Download interrupted");
|
||||
return;
|
||||
}
|
||||
if (saveAs.exists()) {
|
||||
if (Files.exists(saveAs)) {
|
||||
if (Utils.getConfigBoolean("file.overwrite", false)) {
|
||||
logger.info("[!] Deleting existing file" + prettySaveAs);
|
||||
saveAs.delete();
|
||||
try {
|
||||
Files.delete(saveAs);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
} else {
|
||||
logger.info("[!] Skipping " + url + " -- file already exists: " + prettySaveAs);
|
||||
observer.downloadExists(url, saveAs);
|
||||
@ -100,7 +105,7 @@ class DownloadVideoThread extends Thread {
|
||||
huc.connect();
|
||||
// Check status code
|
||||
bis = new BufferedInputStream(huc.getInputStream());
|
||||
fos = new FileOutputStream(saveAs);
|
||||
fos = Files.newOutputStream(saveAs);
|
||||
while ( (bytesRead = bis.read(data)) != -1) {
|
||||
try {
|
||||
observer.stopCheck();
|
||||
@ -122,10 +127,10 @@ class DownloadVideoThread extends Thread {
|
||||
// Close any open streams
|
||||
try {
|
||||
if (bis != null) { bis.close(); }
|
||||
} catch (IOException e) { }
|
||||
} catch (IOException ignored) { }
|
||||
try {
|
||||
if (fos != null) { fos.close(); }
|
||||
} catch (IOException e) { }
|
||||
} catch (IOException ignored) { }
|
||||
}
|
||||
if (tries > this.retries) {
|
||||
logger.error("[!] Exceeded maximum retries (" + this.retries + ") for URL " + url);
|
||||
|
@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
@ -11,10 +12,10 @@ import java.net.URL;
|
||||
* (cheers!)
|
||||
*/
|
||||
interface RipperInterface {
|
||||
void rip() throws IOException;
|
||||
void rip() throws IOException, URISyntaxException;
|
||||
boolean canRip(URL url);
|
||||
URL sanitizeURL(URL url) throws MalformedURLException;
|
||||
void setWorkingDir(URL url) throws IOException;
|
||||
URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException;
|
||||
void setWorkingDir(URL url) throws IOException, URISyntaxException;
|
||||
String getHost();
|
||||
String getGID(URL url) throws MalformedURLException;
|
||||
String getGID(URL url) throws MalformedURLException, URISyntaxException;
|
||||
}
|
@ -8,7 +8,9 @@ import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
@ -21,7 +23,7 @@ public abstract class VideoRipper extends AbstractRipper {
|
||||
super(url);
|
||||
}
|
||||
|
||||
public abstract void rip() throws IOException;
|
||||
public abstract void rip() throws IOException, URISyntaxException;
|
||||
|
||||
public abstract String getHost();
|
||||
|
||||
@ -43,10 +45,10 @@ public abstract class VideoRipper extends AbstractRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addURLToDownload(URL url, File saveAs) {
|
||||
public boolean addURLToDownload(URL url, Path saveAs) {
|
||||
if (Utils.getConfigBoolean("urls_only.save", false)) {
|
||||
// Output URL to file
|
||||
String urlFile = this.workingDir + File.separator + "urls.txt";
|
||||
String urlFile = this.workingDir + "/urls.txt";
|
||||
|
||||
try (FileWriter fw = new FileWriter(urlFile, true)) {
|
||||
fw.write(url.toExternalForm());
|
||||
@ -66,13 +68,17 @@ public abstract class VideoRipper extends AbstractRipper {
|
||||
this.url = url;
|
||||
return true;
|
||||
}
|
||||
if (shouldIgnoreURL(url)) {
|
||||
sendUpdate(STATUS.DOWNLOAD_SKIP, "Skipping " + url.toExternalForm() + " - ignored extension");
|
||||
return false;
|
||||
}
|
||||
threadPool.addThread(new DownloadVideoThread(url, saveAs, this));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String, String> cookies, Boolean getFileExtFromMIME) {
|
||||
public boolean addURLToDownload(URL url, Path saveAs, String referrer, Map<String, String> cookies, Boolean getFileExtFromMIME) {
|
||||
return addURLToDownload(url, saveAs);
|
||||
}
|
||||
|
||||
@ -83,7 +89,9 @@ public abstract class VideoRipper extends AbstractRipper {
|
||||
*/
|
||||
@Override
|
||||
public void setWorkingDir(URL url) throws IOException {
|
||||
String path = Utils.getWorkingDirectory().getCanonicalPath();
|
||||
Path wd = Utils.getWorkingDirectory();
|
||||
// TODO - change to nio
|
||||
String path = wd.toAbsolutePath().toString();
|
||||
|
||||
if (!path.endsWith(File.separator)) {
|
||||
path += File.separator;
|
||||
@ -93,7 +101,7 @@ public abstract class VideoRipper extends AbstractRipper {
|
||||
workingDir = new File(path);
|
||||
|
||||
if (!workingDir.exists()) {
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(workingDir));
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(workingDir.toPath()));
|
||||
workingDir.mkdirs();
|
||||
}
|
||||
|
||||
@ -115,7 +123,7 @@ public abstract class VideoRipper extends AbstractRipper {
|
||||
* @param saveAs Path to file, including filename.
|
||||
*/
|
||||
@Override
|
||||
public void downloadCompleted(URL url, File saveAs) {
|
||||
public void downloadCompleted(URL url, Path saveAs) {
|
||||
if (observer == null) {
|
||||
return;
|
||||
}
|
||||
@ -149,12 +157,11 @@ public abstract class VideoRipper extends AbstractRipper {
|
||||
|
||||
/**
|
||||
* Runs if user tries to redownload an already existing File.
|
||||
*
|
||||
* @param url Target URL
|
||||
* @param url Target URL
|
||||
* @param file Existing file
|
||||
*/
|
||||
@Override
|
||||
public void downloadExists(URL url, File file) {
|
||||
public void downloadExists(URL url, Path file) {
|
||||
if (observer == null) {
|
||||
return;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -14,7 +15,6 @@ import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class AerisdiesRipper extends AbstractHTMLRipper {
|
||||
@ -47,9 +47,9 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
Element el = getFirstPage().select(".headtext").first();
|
||||
Element el = getCachedFirstPage().select(".headtext").first();
|
||||
if (el == null) {
|
||||
throw new IOException("Unable to get album title");
|
||||
}
|
||||
@ -62,11 +62,6 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
|
@ -12,7 +12,6 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class AllporncomicRipper extends AbstractHTMLRipper {
|
||||
|
||||
@ -46,17 +45,11 @@ public class AllporncomicRipper extends AbstractHTMLRipper {
|
||||
"allporncomic.com/TITLE/CHAPTER - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
for (Element el : doc.select(".wp-manga-chapter-img")) {
|
||||
result.add(el.attr("src"));
|
||||
result.add(el.attr("data-src"));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -50,7 +52,7 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
try {
|
||||
// groupData = Http.url(albumURL.getLocation()).getJSON();
|
||||
groupData = getJson(albumURL.getLocation());
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
throw new MalformedURLException("Couldn't load JSON from " + albumURL.getLocation());
|
||||
}
|
||||
return groupData.getString("title");
|
||||
@ -60,9 +62,9 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
// URL points to user portfolio, use user's full name as GID
|
||||
String userInfoURL = "https://www.artstation.com/users/" + albumURL.getID() + "/quick.json";
|
||||
try {
|
||||
// groupData = Http.url(userInfoURL).getJSON();
|
||||
// groupData = Http.url(userInfoURL).getJSON();
|
||||
groupData = getJson(userInfoURL);
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
throw new MalformedURLException("Couldn't load JSON from " + userInfoURL);
|
||||
}
|
||||
return groupData.getString("full_name");
|
||||
@ -74,7 +76,7 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JSONObject getFirstPage() throws IOException {
|
||||
protected JSONObject getFirstPage() throws IOException, URISyntaxException {
|
||||
if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) {
|
||||
// URL points to JSON of a single project, just return it
|
||||
// return Http.url(albumURL.getLocation()).getJSON();
|
||||
@ -90,7 +92,7 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
if (albumContent.getInt("total_count") > 0) {
|
||||
// Get JSON of the first project and return it
|
||||
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(0);
|
||||
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
|
||||
ParsedURL projectURL = parseURL(new URI(projectInfo.getString("permalink")).toURL());
|
||||
// return Http.url(projectURL.getLocation()).getJSON();
|
||||
return getJson(projectURL.getLocation());
|
||||
}
|
||||
@ -100,7 +102,7 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JSONObject getNextPage(JSONObject doc) throws IOException {
|
||||
protected JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException {
|
||||
if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) {
|
||||
// Initialize the page number if it hasn't been initialized already
|
||||
if (projectPageNumber == null) {
|
||||
@ -117,7 +119,7 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
projectIndex = 0;
|
||||
}
|
||||
|
||||
Integer currentProject = ((projectPageNumber - 1) * 50) + (projectIndex + 1);
|
||||
int currentProject = ((projectPageNumber - 1) * 50) + (projectIndex + 1);
|
||||
// JSONObject albumContent = Http.url(albumURL.getLocation() + "?page=" +
|
||||
// projectPageNumber).getJSON();
|
||||
JSONObject albumContent = getJson(albumURL.getLocation() + "?page=" + projectPageNumber);
|
||||
@ -125,7 +127,7 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
if (albumContent.getInt("total_count") > currentProject) {
|
||||
// Get JSON of the next project and return it
|
||||
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(projectIndex);
|
||||
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
|
||||
ParsedURL projectURL = parseURL(new URI(projectInfo.getString("permalink")).toURL());
|
||||
projectIndex++;
|
||||
// return Http.url(projectURL.getLocation()).getJSON();
|
||||
return getJson(projectURL.getLocation());
|
||||
@ -254,7 +256,7 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
|
||||
con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
|
||||
con.header("Accept-Language", "en-US,en;q=0.5");
|
||||
con.header("Accept-Encoding", "gzip, deflate, br");
|
||||
// con.header("Accept-Encoding", "gzip, deflate, br");
|
||||
con.header("Upgrade-Insecure-Requests", "1");
|
||||
Response res = con.execute();
|
||||
int status = res.statusCode();
|
||||
@ -309,7 +311,7 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
|
||||
con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
|
||||
con.header("Accept-Language", "en-US,en;q=0.5");
|
||||
con.header("Accept-Encoding", "gzip, deflate, br");
|
||||
// con.header("Accept-Encoding", "gzip, deflate, br");
|
||||
con.header("Upgrade-Insecure-Requests", "1");
|
||||
Response res = con.execute();
|
||||
int status = res.statusCode();
|
||||
@ -320,8 +322,8 @@ public class ArtStationRipper extends AbstractJSONRipper {
|
||||
throw new IOException("Error fetching json. Status code:" + status);
|
||||
}
|
||||
|
||||
private JSONObject getJson(String url) throws IOException {
|
||||
return getJson(new URL(url));
|
||||
private JSONObject getJson(String url) throws IOException, URISyntaxException {
|
||||
return getJson(new URI(url).toURL());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,58 +1,60 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
import org.jsoup.Connection.Response;
|
||||
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
/*
|
||||
* Ripper for ArtStation's short URL domain.
|
||||
* Example URL: https://artstn.co/p/JlE15Z
|
||||
*/
|
||||
|
||||
public class ArtstnRipper extends ArtStationRipper {
|
||||
public URL artStationUrl = null;
|
||||
|
||||
public ArtstnRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith("artstn.co");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
if (artStationUrl == null) {
|
||||
// Run only once.
|
||||
try {
|
||||
artStationUrl = getFinalUrl(url);
|
||||
if (artStationUrl == null) {
|
||||
throw new IOException("Null url received.");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Couldnt resolve URL.", e);
|
||||
}
|
||||
|
||||
}
|
||||
return super.getGID(artStationUrl);
|
||||
}
|
||||
|
||||
public URL getFinalUrl(URL url) throws IOException {
|
||||
if (url.getHost().endsWith("artstation.com")) {
|
||||
return url;
|
||||
}
|
||||
|
||||
LOGGER.info("Checking url: " + url);
|
||||
Response response = Http.url(url).connection().followRedirects(false).execute();
|
||||
if (response.statusCode() / 100 == 3 && response.hasHeader("location")) {
|
||||
return getFinalUrl(new URL(response.header("location")));
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
|
||||
import org.jsoup.Connection.Response;
|
||||
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
/*
|
||||
* Ripper for ArtStation's short URL domain.
|
||||
* Example URL: https://artstn.co/p/JlE15Z
|
||||
*/
|
||||
|
||||
public class ArtstnRipper extends ArtStationRipper {
|
||||
public URL artStationUrl = null;
|
||||
|
||||
public ArtstnRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith("artstn.co");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
if (artStationUrl == null) {
|
||||
// Run only once.
|
||||
try {
|
||||
artStationUrl = getFinalUrl(url);
|
||||
if (artStationUrl == null) {
|
||||
throw new IOException("Null url received.");
|
||||
}
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
LOGGER.error("Couldnt resolve URL.", e);
|
||||
}
|
||||
|
||||
}
|
||||
return super.getGID(artStationUrl);
|
||||
}
|
||||
|
||||
public URL getFinalUrl(URL url) throws IOException, URISyntaxException {
|
||||
if (url.getHost().endsWith("artstation.com")) {
|
||||
return url;
|
||||
}
|
||||
|
||||
LOGGER.info("Checking url: " + url);
|
||||
Response response = Http.url(url).connection().followRedirects(false).execute();
|
||||
if (response.statusCode() / 100 == 3 && response.hasHeader("location")) {
|
||||
return getFinalUrl(new URI(response.header("location")).toURL());
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -13,7 +14,6 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class BatoRipper extends AbstractHTMLRipper {
|
||||
|
||||
@ -70,10 +70,10 @@ public class BatoRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
return getHost() + "_" + getGID(url) + "_" + getFirstPage().select("title").first().text().replaceAll(" ", "_");
|
||||
return getHost() + "_" + getGID(url) + "_" + getCachedFirstPage().select("title").first().text().replaceAll(" ", "_");
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
LOGGER.info("Unable to find title at " + url);
|
||||
@ -94,11 +94,6 @@ public class BatoRipper extends AbstractHTMLRipper {
|
||||
return m.matches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
|
@ -47,11 +47,6 @@ public class BcfakesRipper extends AbstractHTMLRipper {
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// Find next page
|
||||
|
@ -41,12 +41,6 @@ public class BlackbrickroadofozRipper extends AbstractHTMLRipper {
|
||||
"www.blackbrickroadofoz.com/comic/PAGE - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
sleep(1000);
|
||||
|
@ -12,12 +12,14 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
public class BooruRipper extends AbstractHTMLRipper {
|
||||
private static final Logger logger = Logger.getLogger(BooruRipper.class);
|
||||
private static final Logger logger = LogManager.getLogger(BooruRipper.class);
|
||||
|
||||
private static Pattern gidPattern = null;
|
||||
|
||||
|
@ -2,10 +2,11 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.rippers.ripperhelpers.ChanSite;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@ -13,7 +14,6 @@ import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
@ -72,7 +72,7 @@ public class ChanRipper extends AbstractHTMLRipper {
|
||||
);
|
||||
|
||||
private ChanSite chanSite;
|
||||
private Boolean generalChanSite = true;
|
||||
private boolean generalChanSite = true;
|
||||
|
||||
public ChanRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@ -104,7 +104,7 @@ public class ChanRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
try {
|
||||
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
|
||||
return getHost() + "_" + getGID(url) + "_" + subject;
|
||||
@ -195,11 +195,9 @@ public class ChanRipper extends AbstractHTMLRipper {
|
||||
return this.url.getHost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(this.url).get();
|
||||
public Document getFirstPage() throws IOException, URISyntaxException {
|
||||
return super.getFirstPage();
|
||||
}
|
||||
|
||||
private boolean isURLBlacklisted(String url) {
|
||||
for (String blacklist_item : url_piece_blacklist) {
|
||||
if (url.contains(blacklist_item)) {
|
||||
|
@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@ -50,10 +51,10 @@ public class CheveretoRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
||||
Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return getHost() + "_" + title.trim();
|
||||
|
@ -1,173 +1,174 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
/**
|
||||
* @author Tushar
|
||||
*
|
||||
*/
|
||||
public class ComicextraRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String FILE_NAME = "page";
|
||||
|
||||
private Pattern p1 =
|
||||
Pattern.compile("https:\\/\\/www.comicextra.com\\/comic\\/([A-Za-z0-9_-]+)");
|
||||
private Pattern p2 = Pattern.compile(
|
||||
"https:\\/\\/www.comicextra.com\\/([A-Za-z0-9_-]+)\\/([A-Za-z0-9_-]+)(?:\\/full)?");
|
||||
private UrlType urlType = UrlType.UNKNOWN;
|
||||
private List<String> chaptersList = null;
|
||||
private int chapterIndex = -1; // index for the chaptersList, useful in getting the next page.
|
||||
private int imageIndex = 0; // image index for each chapter images.
|
||||
|
||||
public ComicextraRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return "comicextra.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "comicextra";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Matcher m1 = p1.matcher(url.toExternalForm());
|
||||
if (m1.matches()) {
|
||||
// URL is of comic( https://www.comicextra.com/comic/the-punisher-frank-castle-max).
|
||||
urlType = UrlType.COMIC;
|
||||
return m1.group(1);
|
||||
}
|
||||
|
||||
Matcher m2 = p2.matcher(url.toExternalForm());
|
||||
if (m2.matches()) {
|
||||
// URL is of chapter( https://www.comicextra.com/the-punisher-frank-castle-max/chapter-75).
|
||||
urlType = UrlType.CHAPTER;
|
||||
return m2.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected comicextra.com url of type: https://www.comicextra.com/comic/some-comic-name\n"
|
||||
+ " or https://www.comicextra.com/some-comic-name/chapter-001 got " + url
|
||||
+ " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Document getFirstPage() throws IOException {
|
||||
Document doc = null;
|
||||
|
||||
switch (urlType) {
|
||||
case COMIC:
|
||||
// For COMIC type url we extract the urls of each chapters and store them in chapters.
|
||||
chaptersList = new ArrayList<>();
|
||||
Document comicPage = Http.url(url).get();
|
||||
Elements elements = comicPage.select("div.episode-list a");
|
||||
for (Element e : elements) {
|
||||
chaptersList.add(getCompleteChapterUrl(e.attr("abs:href")));
|
||||
}
|
||||
|
||||
// Set the first chapter from the chapterList as the doc.
|
||||
chapterIndex = 0;
|
||||
doc = Http.url(chaptersList.get(chapterIndex)).get();
|
||||
break;
|
||||
case CHAPTER:
|
||||
doc = Http.url(url).get();
|
||||
break;
|
||||
case UNKNOWN:
|
||||
default:
|
||||
throw new IOException("Unknown url type encountered.");
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
if (urlType == UrlType.COMIC) {
|
||||
++chapterIndex;
|
||||
imageIndex = 0; // Resetting the imagesIndex so that images prefix within each chapter starts from '001_'.
|
||||
if (chapterIndex < chaptersList.size()) {
|
||||
return Http.url(chaptersList.get(chapterIndex)).get();
|
||||
}
|
||||
}
|
||||
|
||||
return super.getNextPage(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
List<String> urls = new ArrayList<>();
|
||||
|
||||
if (urlType == UrlType.COMIC || urlType == UrlType.CHAPTER) {
|
||||
Elements images = page.select("img.chapter_img");
|
||||
for (Element img : images) {
|
||||
urls.add(img.attr("src"));
|
||||
}
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
String subdirectory = getSubDirectoryName();
|
||||
String prefix = getPrefix(++imageIndex);
|
||||
|
||||
addURLToDownload(url, prefix, subdirectory, null, null, FILE_NAME, null, Boolean.TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function appends /full at the end of the chapters url to get all the images for the
|
||||
* chapter in the same Document.
|
||||
*/
|
||||
private String getCompleteChapterUrl(String chapterUrl) {
|
||||
if (!chapterUrl.endsWith("/full")) {
|
||||
chapterUrl = chapterUrl + "/full";
|
||||
}
|
||||
return chapterUrl;
|
||||
}
|
||||
|
||||
/*
|
||||
* This functions returns sub folder name for the current chapter.
|
||||
*/
|
||||
private String getSubDirectoryName() {
|
||||
String subDirectory = "";
|
||||
|
||||
if (urlType == UrlType.COMIC) {
|
||||
Matcher m = p2.matcher(chaptersList.get(chapterIndex));
|
||||
if (m.matches()) {
|
||||
subDirectory = m.group(2);
|
||||
}
|
||||
}
|
||||
|
||||
if (urlType == UrlType.CHAPTER) {
|
||||
Matcher m = p2.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
subDirectory = m.group(2);
|
||||
}
|
||||
}
|
||||
|
||||
return subDirectory;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enum to classify different types of urls.
|
||||
*/
|
||||
private enum UrlType {
|
||||
COMIC, CHAPTER, UNKNOWN
|
||||
}
|
||||
}
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
/**
|
||||
* @author Tushar
|
||||
*
|
||||
*/
|
||||
public class ComicextraRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String FILE_NAME = "page";
|
||||
|
||||
private Pattern p1 =
|
||||
Pattern.compile("https:\\/\\/www.comicextra.com\\/comic\\/([A-Za-z0-9_-]+)");
|
||||
private Pattern p2 = Pattern.compile(
|
||||
"https:\\/\\/www.comicextra.com\\/([A-Za-z0-9_-]+)\\/([A-Za-z0-9_-]+)(?:\\/full)?");
|
||||
private UrlType urlType = UrlType.UNKNOWN;
|
||||
private List<String> chaptersList = null;
|
||||
private int chapterIndex = -1; // index for the chaptersList, useful in getting the next page.
|
||||
private int imageIndex = 0; // image index for each chapter images.
|
||||
|
||||
public ComicextraRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return "comicextra.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "comicextra";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Matcher m1 = p1.matcher(url.toExternalForm());
|
||||
if (m1.matches()) {
|
||||
// URL is of comic( https://www.comicextra.com/comic/the-punisher-frank-castle-max).
|
||||
urlType = UrlType.COMIC;
|
||||
return m1.group(1);
|
||||
}
|
||||
|
||||
Matcher m2 = p2.matcher(url.toExternalForm());
|
||||
if (m2.matches()) {
|
||||
// URL is of chapter( https://www.comicextra.com/the-punisher-frank-castle-max/chapter-75).
|
||||
urlType = UrlType.CHAPTER;
|
||||
return m2.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected comicextra.com url of type: https://www.comicextra.com/comic/some-comic-name\n"
|
||||
+ " or https://www.comicextra.com/some-comic-name/chapter-001 got " + url
|
||||
+ " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Document getFirstPage() throws IOException {
|
||||
Document doc = null;
|
||||
|
||||
switch (urlType) {
|
||||
case COMIC:
|
||||
// For COMIC type url we extract the urls of each chapters and store them in chapters.
|
||||
chaptersList = new ArrayList<>();
|
||||
Document comicPage = Http.url(url).get();
|
||||
Elements elements = comicPage.select("div.episode-list a");
|
||||
for (Element e : elements) {
|
||||
chaptersList.add(getCompleteChapterUrl(e.attr("abs:href")));
|
||||
}
|
||||
|
||||
// Set the first chapter from the chapterList as the doc.
|
||||
chapterIndex = 0;
|
||||
doc = Http.url(chaptersList.get(chapterIndex)).get();
|
||||
break;
|
||||
case CHAPTER:
|
||||
doc = Http.url(url).get();
|
||||
break;
|
||||
case UNKNOWN:
|
||||
default:
|
||||
throw new IOException("Unknown url type encountered.");
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException, URISyntaxException {
|
||||
if (urlType == UrlType.COMIC) {
|
||||
++chapterIndex;
|
||||
imageIndex = 0; // Resetting the imagesIndex so that images prefix within each chapter starts from '001_'.
|
||||
if (chapterIndex < chaptersList.size()) {
|
||||
return Http.url(chaptersList.get(chapterIndex)).get();
|
||||
}
|
||||
}
|
||||
|
||||
return super.getNextPage(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
List<String> urls = new ArrayList<>();
|
||||
|
||||
if (urlType == UrlType.COMIC || urlType == UrlType.CHAPTER) {
|
||||
Elements images = page.select("img.chapter_img");
|
||||
for (Element img : images) {
|
||||
urls.add(img.attr("src"));
|
||||
}
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
String subdirectory = getSubDirectoryName();
|
||||
String prefix = getPrefix(++imageIndex);
|
||||
|
||||
addURLToDownload(url, subdirectory, null, null, prefix, FILE_NAME, null, Boolean.TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function appends /full at the end of the chapters url to get all the images for the
|
||||
* chapter in the same Document.
|
||||
*/
|
||||
private String getCompleteChapterUrl(String chapterUrl) {
|
||||
if (!chapterUrl.endsWith("/full")) {
|
||||
chapterUrl = chapterUrl + "/full";
|
||||
}
|
||||
return chapterUrl;
|
||||
}
|
||||
|
||||
/*
|
||||
* This functions returns sub folder name for the current chapter.
|
||||
*/
|
||||
private String getSubDirectoryName() {
|
||||
String subDirectory = "";
|
||||
|
||||
if (urlType == UrlType.COMIC) {
|
||||
Matcher m = p2.matcher(chaptersList.get(chapterIndex));
|
||||
if (m.matches()) {
|
||||
subDirectory = m.group(2);
|
||||
}
|
||||
}
|
||||
|
||||
if (urlType == UrlType.CHAPTER) {
|
||||
Matcher m = p2.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
subDirectory = m.group(2);
|
||||
}
|
||||
}
|
||||
|
||||
return subDirectory;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enum to classify different types of urls.
|
||||
*/
|
||||
private enum UrlType {
|
||||
COMIC, CHAPTER, UNKNOWN
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,180 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* <a href="https://coomer.su/api/schema">See this link for the API schema</a>.
|
||||
*/
|
||||
public class CoomerPartyRipper extends AbstractJSONRipper {
|
||||
private static final Logger LOGGER = LogManager.getLogger(CoomerPartyRipper.class);
|
||||
private static final String IMG_URL_BASE = "https://c3.coomer.su/data";
|
||||
private static final String VID_URL_BASE = "https://c1.coomer.su/data";
|
||||
private static final Pattern IMG_PATTERN = Pattern.compile("^.*\\.(jpg|jpeg|png|gif|apng|webp|tif|tiff)$", Pattern.CASE_INSENSITIVE);
|
||||
private static final Pattern VID_PATTERN = Pattern.compile("^.*\\.(webm|mp4|m4v)$", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
// just so we can return a JSONObject from getFirstPage
|
||||
private static final String KEY_WRAPPER_JSON_ARRAY = "array";
|
||||
|
||||
private static final String KEY_FILE = "file";
|
||||
private static final String KEY_PATH = "path";
|
||||
private static final String KEY_ATTACHMENTS = "attachments";
|
||||
|
||||
// Posts Request Endpoint
|
||||
private static final String POSTS_ENDPOINT = "https://coomer.su/api/v1/%s/user/%s?o=%d";
|
||||
|
||||
// Pagination is strictly 50 posts per page, per API schema.
|
||||
private Integer pageCount = 0;
|
||||
private static final Integer postCount = 50;
|
||||
|
||||
// "Service" of the page to be ripped: Onlyfans, Fansly, Candfans
|
||||
private final String service;
|
||||
|
||||
// Username of the page to be ripped
|
||||
private final String user;
|
||||
|
||||
|
||||
|
||||
public CoomerPartyRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
List<String> pathElements = Arrays.stream(url.getPath().split("/"))
|
||||
.filter(element -> !element.isBlank())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
service = pathElements.get(0);
|
||||
user = pathElements.get(2);
|
||||
|
||||
if (service == null || user == null || service.isBlank() || user.isBlank()) {
|
||||
LOGGER.warn("service=" + service + ", user=" + user);
|
||||
throw new MalformedURLException("Invalid coomer.party URL: " + url);
|
||||
}
|
||||
LOGGER.debug("Parsed service=" + service + " and user=" + user + " from " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return "coomer.party";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "coomer.party";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
String host = url.getHost();
|
||||
return host.endsWith("coomer.party") || host.endsWith("coomer.su");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) {
|
||||
return Utils.filesystemSafe(String.format("%s_%s", service, user));
|
||||
}
|
||||
|
||||
private JSONObject getJsonPostsForOffset(Integer offset) throws IOException {
|
||||
String apiUrl = String.format(POSTS_ENDPOINT, service, user, offset);
|
||||
|
||||
String jsonArrayString = Http.url(apiUrl)
|
||||
.ignoreContentType()
|
||||
.response()
|
||||
.body();
|
||||
JSONArray jsonArray = new JSONArray(jsonArrayString);
|
||||
|
||||
// Ideally we'd just return the JSONArray from here, but we have to wrap it in a JSONObject
|
||||
JSONObject wrapperObject = new JSONObject();
|
||||
wrapperObject.put(KEY_WRAPPER_JSON_ARRAY, jsonArray);
|
||||
return wrapperObject;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JSONObject getFirstPage() throws IOException {
|
||||
return getJsonPostsForOffset(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException {
|
||||
pageCount++;
|
||||
Integer offset = postCount * pageCount;
|
||||
return getJsonPostsForOffset(offset);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromJSON(JSONObject json) {
|
||||
// extract the array from our wrapper JSONObject
|
||||
JSONArray posts = json.getJSONArray(KEY_WRAPPER_JSON_ARRAY);
|
||||
ArrayList<String> urls = new ArrayList<>();
|
||||
for (int i = 0; i < posts.length(); i++) {
|
||||
JSONObject post = posts.getJSONObject(i);
|
||||
pullFileUrl(post, urls);
|
||||
pullAttachmentUrls(post, urls);
|
||||
}
|
||||
LOGGER.debug("Pulled " + urls.size() + " URLs from " + posts.length() + " posts");
|
||||
return urls;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
private void pullFileUrl(JSONObject post, ArrayList<String> results) {
|
||||
try {
|
||||
JSONObject file = post.getJSONObject(KEY_FILE);
|
||||
String path = file.getString(KEY_PATH);
|
||||
if (isImage(path)) {
|
||||
String url = IMG_URL_BASE + path;
|
||||
results.add(url);
|
||||
} else if (isVideo(path)) {
|
||||
String url = VID_URL_BASE + path;
|
||||
results.add(url);
|
||||
} else {
|
||||
LOGGER.error("Unknown extension for coomer.su path: " + path);
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
/* No-op */
|
||||
LOGGER.error("Unable to Parse FileURL " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void pullAttachmentUrls(JSONObject post, ArrayList<String> results) {
|
||||
try {
|
||||
JSONArray attachments = post.getJSONArray(KEY_ATTACHMENTS);
|
||||
for (int i = 0; i < attachments.length(); i++) {
|
||||
JSONObject attachment = attachments.getJSONObject(i);
|
||||
pullFileUrl(attachment, results);
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
/* No-op */
|
||||
LOGGER.error("Unable to Parse AttachmentURL " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isImage(String path) {
|
||||
Matcher matcher = IMG_PATTERN.matcher(path);
|
||||
return matcher.matches();
|
||||
}
|
||||
|
||||
private boolean isVideo(String path) {
|
||||
Matcher matcher = VID_PATTERN.matcher(path);
|
||||
return matcher.matches();
|
||||
}
|
||||
}
|
@ -0,0 +1,55 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
public class CyberdropRipper extends AbstractHTMLRipper {
|
||||
|
||||
public CyberdropRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "cyberdrop";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "cyberdrop.me";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://cyberdrop\\.me/a/([a-zA-Z0-9]+).*?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected cyberdrop.me URL format: " +
|
||||
"https://cyberdrop.me/a/xxxxxxxx - got " + url + "instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
ArrayList<String> urls = new ArrayList<>();
|
||||
for (Element element: page.getElementsByClass("image")) {
|
||||
urls.add(element.attr("href"));
|
||||
}
|
||||
return urls;
|
||||
}
|
||||
}
|
@ -0,0 +1,148 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import okhttp3.OkHttpClient;
|
||||
import okhttp3.Request;
|
||||
import okhttp3.Response;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
public class DanbooruRipper extends AbstractJSONRipper {
|
||||
private static final String DOMAIN = "danbooru.donmai.us",
|
||||
HOST = "danbooru";
|
||||
private final OkHttpClient client;
|
||||
|
||||
private Pattern gidPattern = null;
|
||||
|
||||
private int currentPageNum = 1;
|
||||
|
||||
public DanbooruRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
this.client = new OkHttpClient.Builder()
|
||||
.readTimeout(60, TimeUnit.SECONDS)
|
||||
.writeTimeout(60, TimeUnit.SECONDS)
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return DOMAIN;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
}
|
||||
|
||||
private String getPage(int num) throws MalformedURLException {
|
||||
return "https://" + getDomain() + "/posts.json?page=" + num + "&tags=" + getTag(url);
|
||||
}
|
||||
|
||||
private final String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0";
|
||||
@Override
|
||||
protected JSONObject getFirstPage() throws MalformedURLException {
|
||||
return getCurrentPage();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JSONObject getNextPage(JSONObject doc) throws IOException {
|
||||
return getCurrentPage();
|
||||
}
|
||||
|
||||
@Nullable
|
||||
private JSONObject getCurrentPage() throws MalformedURLException {
|
||||
Request request = new Request.Builder()
|
||||
.url(getPage(currentPageNum))
|
||||
.header("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1")
|
||||
.header("Accept", "application/json,text/javascript,*/*;q=0.01")
|
||||
.header("Accept-Language", "en-US,en;q=0.9")
|
||||
.header("Sec-Fetch-Dest", "empty")
|
||||
.header("Sec-Fetch-Mode", "cors")
|
||||
.header("Sec-Fetch-Site", "same-origin")
|
||||
.header("Referer", "https://danbooru.donmai.us/")
|
||||
.header("X-Requested-With", "XMLHttpRequest")
|
||||
.header("Connection", "keep-alive")
|
||||
.build();
|
||||
Response response = null;
|
||||
currentPageNum++;
|
||||
try {
|
||||
response = client.newCall(request).execute();
|
||||
if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
|
||||
|
||||
String responseData = response.body().string();
|
||||
JSONArray jsonArray = new JSONArray(responseData);
|
||||
if(!jsonArray.isEmpty()){
|
||||
String newCompatibleJSON = "{ \"resources\":" + jsonArray + " }";
|
||||
return new JSONObject(newCompatibleJSON);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
if(response !=null) {
|
||||
response.body().close();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromJSON(JSONObject json) {
|
||||
List<String> res = new ArrayList<>(100);
|
||||
JSONArray jsonArray = json.getJSONArray("resources");
|
||||
for (int i = 0; i < jsonArray.length(); i++) {
|
||||
if (jsonArray.getJSONObject(i).has("file_url")) {
|
||||
res.add(jsonArray.getJSONObject(i).getString("file_url"));
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
try {
|
||||
return Utils.filesystemSafe(new URI(getTag(url).replaceAll("([?&])tags=", "")).getPath());
|
||||
} catch (URISyntaxException ex) {
|
||||
LOGGER.error(ex);
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected booru URL format: " + getDomain() + "/posts?tags=searchterm - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
private String getTag(URL url) throws MalformedURLException {
|
||||
gidPattern = Pattern.compile("https?://danbooru.donmai.us/(posts)?.*([?&]tags=([^&]*)(?:&z=([0-9]+))?$)");
|
||||
Matcher m = gidPattern.matcher(url.toExternalForm());
|
||||
|
||||
if (m.matches()) {
|
||||
return m.group(3);
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected danbooru URL format: " + getDomain() + "/posts?tags=searchterm - got " + url + " instead");
|
||||
}
|
||||
|
||||
}
|
@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -37,7 +39,7 @@ public class DerpiRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
String u = url.toExternalForm();
|
||||
String[] uu = u.split("\\?", 2);
|
||||
String newU = uu[0];
|
||||
@ -54,7 +56,7 @@ public class DerpiRipper extends AbstractJSONRipper {
|
||||
newU += "&key=" + key;
|
||||
}
|
||||
|
||||
return new URL(newU);
|
||||
return new URI(newU).toURL();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -99,10 +101,10 @@ public class DerpiRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject getNextPage(JSONObject doc) throws IOException {
|
||||
public JSONObject getNextPage(JSONObject doc) throws IOException, URISyntaxException {
|
||||
currPage++;
|
||||
String u = currUrl.toExternalForm() + "&page=" + Integer.toString(currPage);
|
||||
JSONObject json = Http.url(new URL(u)).getJSON();
|
||||
JSONObject json = Http.url(new URI(u).toURL()).getJSON();
|
||||
JSONArray arr;
|
||||
if (json.has("images")) {
|
||||
arr = json.getJSONArray("images");
|
||||
|
@ -13,12 +13,13 @@ import java.io.ObjectInputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.io.Serializable;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Base64;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
@ -28,7 +29,6 @@ import org.jsoup.Connection;
|
||||
import org.jsoup.Connection.Method;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.HttpStatusException;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
@ -383,11 +383,11 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
||||
try {
|
||||
String url = cleanURL();
|
||||
if (this.usingCatPath) {
|
||||
return (new URL(url + "?catpath=/&offset=" + offset));
|
||||
return (new URI(url + "?catpath=/&offset=" + offset)).toURL();
|
||||
} else {
|
||||
return (new URL(url + "?offset=" + offset));
|
||||
return (new URI(url + "?offset=" + offset).toURL());
|
||||
}
|
||||
} catch (MalformedURLException e) {
|
||||
} catch (MalformedURLException | URISyntaxException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
@ -518,8 +518,8 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
||||
* @author MrPlaygon
|
||||
*
|
||||
*/
|
||||
private class DeviantartImageThread extends Thread {
|
||||
private URL url;
|
||||
private class DeviantartImageThread implements Runnable {
|
||||
private final URL url;
|
||||
|
||||
public DeviantartImageThread(URL url) {
|
||||
this.url = url;
|
||||
@ -533,8 +533,6 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
||||
/**
|
||||
* Get URL to Artwork and return fullsize URL with file ending.
|
||||
*
|
||||
* @param page Like
|
||||
* https://www.deviantart.com/apofiss/art/warmest-of-the-days-455668450
|
||||
* @return URL like
|
||||
* https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/07f7a6bb-2d35-4630-93fc-be249af22b3e/d7jak0y-d20e5932-df72-4d13-b002-5e122037b373.jpg
|
||||
*
|
||||
@ -630,11 +628,11 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
String[] tmpParts = downloadString.split("\\."); //split to get file ending
|
||||
|
||||
addURLToDownload(new URL(downloadString), "", "", "", new HashMap<String, String>(),
|
||||
addURLToDownload(new URI(downloadString).toURL(), "", "", "", new HashMap<String, String>(),
|
||||
title + "." + tmpParts[tmpParts.length - 1]);
|
||||
return;
|
||||
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
|
@ -1,91 +0,0 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class DrawcrowdRipper extends AbstractHTMLRipper {
|
||||
|
||||
public DrawcrowdRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "drawcrowd";
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "drawcrowd.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p; Matcher m;
|
||||
|
||||
p = Pattern.compile("^.*drawcrowd.com/projects/.*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
throw new MalformedURLException("Cannot rip drawcrowd.com/projects/ pages");
|
||||
}
|
||||
|
||||
p = Pattern.compile("^.*drawcrowd.com/([a-zA-Z0-9\\-_]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected drawcrowd.com gallery format: "
|
||||
+ "drawcrowd.com/username"
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(this.url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
Elements loadMore = doc.select("a#load-more");
|
||||
if (loadMore.isEmpty()) {
|
||||
throw new IOException("No next page found");
|
||||
}
|
||||
if (!sleep(1000)) {
|
||||
throw new IOException("Interrupted while waiting for next page");
|
||||
}
|
||||
String nextPage = "http://drawcrowd.com" + loadMore.get(0).attr("href");
|
||||
return Http.url(nextPage).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
for (Element thumb : page.select("div.item.asset img")) {
|
||||
String image = thumb.attr("src");
|
||||
image = image
|
||||
.replaceAll("/medium/", "/large/")
|
||||
.replaceAll("/small/", "/large/");
|
||||
imageURLs.add(image);
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
}
|
@ -41,10 +41,6 @@ public class DribbbleRipper extends AbstractHTMLRipper {
|
||||
"dribbble.com/albumid - got " + url + "instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// Find next page
|
||||
|
@ -1,139 +0,0 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class DuckmoviesRipper extends AbstractSingleFileRipper {
|
||||
public DuckmoviesRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasQueueSupport() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean pageContainsAlbums(URL url) {
|
||||
Pattern pa = Pattern.compile("https?://[a-zA-Z0-9]+.[a-zA-Z]+/(models|category)/([a-zA-Z0-9_-])+/?");
|
||||
Matcher ma = pa.matcher(url.toExternalForm());
|
||||
if (ma.matches()) {
|
||||
return true;
|
||||
}
|
||||
pa = Pattern.compile("https?://[a-zA-Z0-9]+.[a-zA-Z]+/(models|category)/([a-zA-Z0-9_-])+/page/\\d+/?");
|
||||
ma = pa.matcher(url.toExternalForm());
|
||||
if (ma.matches()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getAlbumsToQueue(Document doc) {
|
||||
List<String> urlsToAddToQueue = new ArrayList<>();
|
||||
for (Element elem : doc.select(".post > li > div > div > a")) {
|
||||
urlsToAddToQueue.add(elem.attr("href"));
|
||||
}
|
||||
return urlsToAddToQueue;
|
||||
}
|
||||
|
||||
|
||||
private static List<String> explicit_domains = Arrays.asList(
|
||||
"vidporntube.fun",
|
||||
"pornbj.fun",
|
||||
"iwantporn.fun",
|
||||
"neoporn.fun",
|
||||
"yayporn.fun",
|
||||
"freshporn.co",
|
||||
"palapaja.stream",
|
||||
"freshporn.co",
|
||||
"pornvidx.fun",
|
||||
"palapaja.com"
|
||||
);
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return url.toExternalForm().split("/")[2];
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return url.toExternalForm().split("/")[2];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
String url_name = url.toExternalForm();
|
||||
return explicit_domains.contains(url_name.split("/")[2]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(this.url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> results = new ArrayList<>();
|
||||
String duckMoviesUrl = doc.select("iframe").attr("src");
|
||||
try {
|
||||
Document duckDoc = Http.url(new URL(duckMoviesUrl)).get();
|
||||
String videoURL = duckDoc.select("source").attr("src");
|
||||
// remove any white spaces so we can download the movie without a 400 error
|
||||
videoURL = videoURL.replaceAll(" ", "%20");
|
||||
results.add(videoURL);
|
||||
} catch (MalformedURLException e) {
|
||||
LOGGER.error(duckMoviesUrl + " is not a valid url");
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Unable to load page " + duckMoviesUrl);
|
||||
e.printStackTrace();
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("https://[a-zA-Z0-9]+\\.[a-zA-Z]+/([a-zA-Z0-9\\-_]+)/?");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
p = Pattern.compile("https?://[a-zA-Z0-9]+.[a-zA-Z]+/(category|models)/([a-zA-Z0-9_-])+/?");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
p = Pattern.compile("https?://[a-zA-Z0-9]+.[a-zA-Z]+/(category|models)/([a-zA-Z0-9_-])+/page/\\d+");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected duckmovies format:"
|
||||
+ "domain.tld/Video-title"
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, "", "", null, null, AbstractRipper.getFileName(url, null, null).replaceAll("%20", "_"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean tryResumeDownload() {return true;}
|
||||
}
|
@ -42,12 +42,6 @@ public class DynastyscansRipper extends AbstractHTMLRipper {
|
||||
"dynasty-scans.com/chapters/ID - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
Element elem = doc.select("a[id=next_link]").first();
|
||||
|
@ -3,32 +3,76 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
|
||||
|
||||
public class E621Ripper extends AbstractHTMLRipper {
|
||||
private static final Logger logger = Logger.getLogger(E621Ripper.class);
|
||||
private static final Logger logger = LogManager.getLogger(E621Ripper.class);
|
||||
|
||||
private static Pattern gidPattern = null;
|
||||
private static Pattern gidPattern2 = null;
|
||||
private static Pattern gidPatternPool = null;
|
||||
|
||||
private static Pattern gidPatternNew = null;
|
||||
private static Pattern gidPatternPoolNew = null;
|
||||
|
||||
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
|
||||
|
||||
private Map<String, String> cookies = new HashMap<String, String>();
|
||||
private String userAgent = USER_AGENT;
|
||||
|
||||
public E621Ripper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
private void loadConfig() {
|
||||
String cookiesString = Utils.getConfigString("e621.cookies", "");
|
||||
if(!cookiesString.equals("")) {
|
||||
cookies = RipUtils.getCookiesFromString(cookiesString);
|
||||
if(cookies.containsKey("cf_clearance"))
|
||||
sendUpdate(STATUS.DOWNLOAD_WARN, "Using CloudFlare captcha cookies, make sure to update them and set your browser's useragent in config!");
|
||||
if(cookies.containsKey("remember"))
|
||||
sendUpdate(STATUS.DOWNLOAD_WARN, "Logging in using auth cookie.");
|
||||
}
|
||||
userAgent = Utils.getConfigString("e621.useragent", USER_AGENT);
|
||||
|
||||
}
|
||||
|
||||
private void warnAboutBlacklist(Document page) {
|
||||
if(!page.select("div.hidden-posts-notice").isEmpty())
|
||||
sendUpdate(STATUS.DOWNLOAD_WARN, "Some posts are blacklisted. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options");
|
||||
}
|
||||
|
||||
private Document getDocument(String url, int retries) throws IOException {
|
||||
return Http.url(url).userAgent(userAgent).retries(retries).cookies(cookies).get();
|
||||
}
|
||||
|
||||
private Document getDocument(String url) throws IOException {
|
||||
return getDocument(url, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return e621ThreadPool;
|
||||
@ -46,15 +90,20 @@ public class E621Ripper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (url.getPath().startsWith("/pool/show/"))
|
||||
return Http.url("https://e621.net/pool/show/" + getTerm(url)).get();
|
||||
loadConfig();
|
||||
Document page;
|
||||
if (url.getPath().startsWith("/pool"))
|
||||
page = getDocument("https://e621.net/pools/" + getTerm(url));
|
||||
else
|
||||
return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get();
|
||||
page = getDocument("https://e621.net/posts?tags=" + getTerm(url));
|
||||
|
||||
warnAboutBlacklist(page);
|
||||
return page;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
Elements elements = page.select("div > span.thumb > a");
|
||||
Elements elements = page.select("article > a");
|
||||
List<String> res = new ArrayList<>();
|
||||
|
||||
for (Element e : elements) {
|
||||
@ -68,8 +117,9 @@ public class E621Ripper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document page) throws IOException {
|
||||
if (!page.select("a.next_page").isEmpty()) {
|
||||
return Http.url(page.select("a.next_page").attr("abs:href")).get();
|
||||
warnAboutBlacklist(page);
|
||||
if (!page.select("a#paginator-next").isEmpty()) {
|
||||
return getDocument(page.select("a#paginator-next").attr("abs:href"));
|
||||
} else {
|
||||
throw new IOException("No more pages.");
|
||||
}
|
||||
@ -82,12 +132,19 @@ public class E621Ripper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
private String getTerm(URL url) throws MalformedURLException {
|
||||
// old url style => new url style:
|
||||
// /post/index/1/<tags> => /posts?tags=<tags>
|
||||
// /pool/show/<id> => /pools/id
|
||||
if (gidPattern == null)
|
||||
gidPattern = Pattern.compile(
|
||||
"^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'():,%\\-]+)(/.*)?(#.*)?$");
|
||||
if (gidPatternPool == null)
|
||||
gidPatternPool = Pattern.compile(
|
||||
"^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\?.*)?(/.*)?(#.*)?$");
|
||||
if (gidPatternNew == null)
|
||||
gidPatternNew = Pattern.compile("^https?://(www\\.)?e621\\.net/posts\\?([\\S]*?)tags=([a-zA-Z0-9$_.+!*'(),%:\\-]+)(\\&[\\S]+)?");
|
||||
if (gidPatternPoolNew == null)
|
||||
gidPatternPoolNew = Pattern.compile("^https?://(www\\.)?e621\\.net/pools/([\\d]+)(\\?[\\S]*)?");
|
||||
|
||||
Matcher m = gidPattern.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
@ -100,36 +157,48 @@ public class E621Ripper extends AbstractHTMLRipper {
|
||||
return m.group(2);
|
||||
}
|
||||
|
||||
m = gidPatternNew.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
LOGGER.info(m.group(3));
|
||||
return m.group(3);
|
||||
}
|
||||
|
||||
m = gidPatternPoolNew.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
LOGGER.info(m.group(2));
|
||||
return m.group(2);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected e621.net URL format: e621.net/post/index/1/searchterm - got " + url + " instead");
|
||||
"Expected e621.net URL format: e621.net/posts?tags=searchterm - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
String prefix = "";
|
||||
if (url.getPath().startsWith("/pool/show/")) {
|
||||
if (url.getPath().startsWith("/pool")) {
|
||||
prefix = "pool_";
|
||||
}
|
||||
return Utils.filesystemSafe(prefix + getTerm(url));
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
if (gidPattern2 == null)
|
||||
gidPattern2 = Pattern.compile(
|
||||
"^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'():,%-]+)(/.*)?(#.*)?$");
|
||||
|
||||
Matcher m = gidPattern2.matcher(url.toExternalForm());
|
||||
if (m.matches())
|
||||
return new URL("https://e621.net/post/index/1/" + m.group(2).replace("+", "%20"));
|
||||
return new URI("https://e621.net/post/index/1/" + m.group(2).replace("+", "%20")).toURL();
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
public class E621FileThread extends Thread {
|
||||
public class E621FileThread implements Runnable {
|
||||
|
||||
private URL url;
|
||||
private String index;
|
||||
private final URL url;
|
||||
private final String index;
|
||||
|
||||
public E621FileThread(URL url, String index) {
|
||||
this.url = url;
|
||||
@ -141,16 +210,16 @@ public class E621Ripper extends AbstractHTMLRipper {
|
||||
try {
|
||||
String fullSizedImage = getFullSizedImage(url);
|
||||
if (fullSizedImage != null && !fullSizedImage.equals("")) {
|
||||
addURLToDownload(new URL(fullSizedImage), index);
|
||||
addURLToDownload(new URI(fullSizedImage).toURL(), index);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
logger.error("Unable to get full sized image from " + url);
|
||||
}
|
||||
}
|
||||
|
||||
private String getFullSizedImage(URL imageURL) throws IOException {
|
||||
Document page = Http.url(imageURL).retries(3).get();
|
||||
Elements video = page.select("video > source");
|
||||
Document page = getDocument(imageURL.toExternalForm(), 3);
|
||||
/*Elements video = page.select("video > source");
|
||||
Elements flash = page.select("embed");
|
||||
Elements image = page.select("a#highres");
|
||||
if (video.size() > 0) {
|
||||
@ -161,8 +230,15 @@ public class E621Ripper extends AbstractHTMLRipper {
|
||||
return image.attr("href");
|
||||
} else {
|
||||
throw new IOException();
|
||||
}
|
||||
}*/
|
||||
|
||||
if (!page.select("div#image-download-link > a").isEmpty()) {
|
||||
return page.select("div#image-download-link > a").attr("abs:href");
|
||||
} else {
|
||||
if(!page.select("#blacklist-box").isEmpty())
|
||||
sendUpdate(RipStatusMessage.STATUS.RIP_ERRORED, "Cannot download image - blocked by blacklist. Consider logging in. Search for \"e621\" in this wiki page: https://github.com/RipMeApp/ripme/wiki/Config-options");
|
||||
throw new IOException();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,9 +1,23 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.File;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@ -11,46 +25,33 @@ import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
// All sleep times are in milliseconds
|
||||
private static final int PAGE_SLEEP_TIME = 3000;
|
||||
private static final int IMAGE_SLEEP_TIME = 1500;
|
||||
private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000;
|
||||
private static final int PAGE_SLEEP_TIME = 3000;
|
||||
private static final int IMAGE_SLEEP_TIME = 1500;
|
||||
private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000;
|
||||
private static final Map<String, String> cookies = new HashMap<>();
|
||||
|
||||
private String lastURL = null;
|
||||
|
||||
// Thread pool for finding direct image links from "image" pages (html)
|
||||
private DownloadThreadPool ehentaiThreadPool = new DownloadThreadPool("ehentai");
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return ehentaiThreadPool;
|
||||
}
|
||||
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
|
||||
private static final Map<String,String> cookies = new HashMap<>();
|
||||
static {
|
||||
cookies.put("nw", "1");
|
||||
cookies.put("tip", "1");
|
||||
}
|
||||
|
||||
private String lastURL = null;
|
||||
// Thread pool for finding direct image links from "image" pages (html)
|
||||
private final DownloadThreadPool ehentaiThreadPool = new DownloadThreadPool("ehentai");
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
|
||||
public EHentaiRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return ehentaiThreadPool;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "e-hentai";
|
||||
@ -61,7 +62,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
return "e-hentai.org";
|
||||
}
|
||||
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
if (albumDoc == null) {
|
||||
@ -93,12 +94,6 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to get page, checks for IP ban, waits.
|
||||
* @param url
|
||||
* @return Page document
|
||||
* @throws IOException If page loading errors, or if retries are exhausted
|
||||
*/
|
||||
private Document getPageWithRetries(URL url) throws IOException {
|
||||
Document doc;
|
||||
int retries = 3;
|
||||
@ -106,9 +101,9 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
||||
LOGGER.info("Retrieving " + url);
|
||||
doc = Http.url(url)
|
||||
.referrer(this.url)
|
||||
.cookies(cookies)
|
||||
.get();
|
||||
.referrer(this.url)
|
||||
.cookies(cookies)
|
||||
.get();
|
||||
if (doc.toString().contains("IP address will be automatically banned")) {
|
||||
if (retries == 0) {
|
||||
throw new IOException("Hit rate limit and maximum number of retries, giving up");
|
||||
@ -120,8 +115,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
} catch (InterruptedException e) {
|
||||
throw new IOException("Interrupted while waiting for rate limit to subside");
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
@ -155,7 +149,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
public Document getNextPage(Document doc) throws IOException, URISyntaxException {
|
||||
// Check if we've stopped
|
||||
if (isStopped()) {
|
||||
throw new IOException("Ripping interrupted");
|
||||
@ -175,7 +169,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
// Sleep before loading next page
|
||||
sleep(PAGE_SLEEP_TIME);
|
||||
// Load next page
|
||||
Document nextPage = getPageWithRetries(new URL(nextURL));
|
||||
Document nextPage = getPageWithRetries(new URI(nextURL).toURL());
|
||||
this.lastURL = nextURL;
|
||||
return nextPage;
|
||||
}
|
||||
@ -183,7 +177,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
Elements thumbs = page.select("#gdt > .gdtm a");
|
||||
Elements thumbs = page.select("#gdt > a");
|
||||
// Iterate over images on page
|
||||
for (Element thumb : thumbs) {
|
||||
imageURLs.add(thumb.attr("href"));
|
||||
@ -193,27 +187,26 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
EHentaiImageThread t = new EHentaiImageThread(url, index, this.workingDir);
|
||||
EHentaiImageThread t = new EHentaiImageThread(url, index, this.workingDir.toPath());
|
||||
ehentaiThreadPool.addThread(t);
|
||||
try {
|
||||
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
} catch (InterruptedException e) {
|
||||
LOGGER.warn("Interrupted while waiting to load next image", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class to find and download images found on "image" pages
|
||||
*
|
||||
* <p>
|
||||
* Handles case when site has IP-banned the user.
|
||||
*/
|
||||
private class EHentaiImageThread extends Thread {
|
||||
private URL url;
|
||||
private int index;
|
||||
private File workingDir;
|
||||
private class EHentaiImageThread implements Runnable {
|
||||
private final URL url;
|
||||
private final int index;
|
||||
private final Path workingDir;
|
||||
|
||||
EHentaiImageThread(URL url, int index, File workingDir) {
|
||||
EHentaiImageThread(URL url, int index, Path workingDir) {
|
||||
super();
|
||||
this.url = url;
|
||||
this.index = index;
|
||||
@ -246,22 +239,21 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
Matcher m = p.matcher(imgsrc);
|
||||
if (m.matches()) {
|
||||
// Manually discover filename from URL
|
||||
String savePath = this.workingDir + File.separator;
|
||||
String savePath = this.workingDir + "/";
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
savePath += String.format("%03d_", index);
|
||||
}
|
||||
savePath += m.group(1);
|
||||
addURLToDownload(new URL(imgsrc), new File(savePath));
|
||||
}
|
||||
else {
|
||||
addURLToDownload(new URI(imgsrc).toURL(), Paths.get(savePath));
|
||||
} else {
|
||||
// Provide prefix and let the AbstractRipper "guess" the filename
|
||||
String prefix = "";
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
prefix = String.format("%03d_", index);
|
||||
}
|
||||
addURLToDownload(new URL(imgsrc), prefix);
|
||||
addURLToDownload(new URI(imgsrc).toURL(), prefix);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,7 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@ -10,8 +9,6 @@ import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
@ -23,13 +20,7 @@ import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Document albumDoc = null;
|
||||
private Map<String,String> cookies = new HashMap<>();
|
||||
// TODO put up a wiki page on using maps to store titles
|
||||
// the map for storing the title of each album when downloading sub albums
|
||||
private Map<URL,String> urlTitles = new HashMap<>();
|
||||
|
||||
private Boolean rippingSubalbums = false;
|
||||
private Map<String, String> cookies = new HashMap<>();
|
||||
|
||||
public EightmusesRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@ -61,10 +52,10 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[name=description]").first();
|
||||
Element titleElement = getCachedFirstPage().select("meta[name=description]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.replace("A huge collection of free porn comics for adults. Read", "");
|
||||
title = title.replace("online for free at 8muses.com", "");
|
||||
@ -78,21 +69,18 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
Response resp = Http.url(url).response();
|
||||
cookies.putAll(resp.cookies());
|
||||
albumDoc = resp.parse();
|
||||
}
|
||||
return albumDoc;
|
||||
Response resp = Http.url(url).response();
|
||||
cookies.putAll(resp.cookies());
|
||||
return resp.parse();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
int x = 1;
|
||||
// This contains the thumbnails of all images on the page
|
||||
Elements pageImages = page.getElementsByClass("c-tile");
|
||||
for (Element thumb : pageImages) {
|
||||
for (int i = 0; i < pageImages.size(); i++) {
|
||||
Element thumb = pageImages.get(i);
|
||||
// If true this link is a sub album
|
||||
if (thumb.attr("href").contains("/comics/album/")) {
|
||||
String subUrl = "https://www.8muses.com" + thumb.attr("href");
|
||||
@ -116,24 +104,14 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
if (thumb.hasAttr("data-cfsrc")) {
|
||||
image = thumb.attr("data-cfsrc");
|
||||
} else {
|
||||
// Deobfustace the json data
|
||||
String rawJson = deobfuscateJSON(page.select("script#ractive-public").html()
|
||||
.replaceAll(">", ">").replaceAll("<", "<").replace("&", "&"));
|
||||
JSONObject json = new JSONObject(rawJson);
|
||||
Element imageElement = thumb.select("img").first();
|
||||
image = "https://comics.8muses.com" + imageElement.attr("data-src").replace("/th/", "/fl/");
|
||||
try {
|
||||
for (int i = 0; i != json.getJSONArray("pictures").length(); i++) {
|
||||
image = "https://www.8muses.com/image/fl/" + json.getJSONArray("pictures").getJSONObject(i).getString("publicUri");
|
||||
URL imageUrl = new URL(image);
|
||||
addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, "", null, true);
|
||||
// X is our page index
|
||||
x++;
|
||||
if (isThisATest()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return imageURLs;
|
||||
} catch (MalformedURLException e) {
|
||||
URL imageUrl = new URI(image).toURL();
|
||||
addURLToDownload(imageUrl, getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, getPrefixShort(i), "", null, true);
|
||||
} catch (MalformedURLException | URISyntaxException e) {
|
||||
LOGGER.error("\"" + image + "\" is malformed");
|
||||
LOGGER.error(e.getMessage());
|
||||
}
|
||||
}
|
||||
if (!image.contains("8muses.com")) {
|
||||
@ -173,25 +151,4 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
public String getPrefixShort(int index) {
|
||||
return String.format("%03d", index);
|
||||
}
|
||||
|
||||
private String deobfuscateJSON(String obfuscatedString) {
|
||||
StringBuilder deobfuscatedString = new StringBuilder();
|
||||
// The first char in one of 8muses obfuscated strings is always ! so we replace it
|
||||
for (char ch : obfuscatedString.replaceFirst("!", "").toCharArray()){
|
||||
deobfuscatedString.append(deobfuscateChar(ch));
|
||||
}
|
||||
return deobfuscatedString.toString();
|
||||
}
|
||||
|
||||
private String deobfuscateChar(char c) {
|
||||
if ((int) c == 32) {
|
||||
return fromCharCode(32);
|
||||
}
|
||||
return fromCharCode(33 + (c + 14) % 94);
|
||||
|
||||
}
|
||||
|
||||
private static String fromCharCode(int... codePoints) {
|
||||
return new String(codePoints, 0, codePoints.length);
|
||||
}
|
||||
}
|
@ -7,6 +7,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -93,11 +95,11 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
if (!is_profile(url)) {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
||||
Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
||||
@ -119,7 +121,6 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
for (Element img : imgs) {
|
||||
if (img.hasClass("album-image")) {
|
||||
String imageURL = img.attr("src");
|
||||
imageURL = imageURL;
|
||||
URLs.add(imageURL);
|
||||
}
|
||||
}
|
||||
@ -195,7 +196,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album or eroshae.com/album");
|
||||
}
|
||||
|
||||
public static List<URL> getURLs(URL url) throws IOException{
|
||||
public static List<URL> getURLs(URL url) throws IOException, URISyntaxException {
|
||||
|
||||
Response resp = Http.url(url)
|
||||
.ignoreContentType()
|
||||
@ -209,7 +210,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
for (Element img : imgs) {
|
||||
if (img.hasClass("album-image")) {
|
||||
String imageURL = img.attr("src");
|
||||
URLs.add(new URL(imageURL));
|
||||
URLs.add(new URI(imageURL).toURL());
|
||||
}
|
||||
}
|
||||
//Videos
|
||||
@ -218,7 +219,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
if (vid.hasClass("album-video")) {
|
||||
Elements source = vid.getElementsByTag("source");
|
||||
String videoURL = source.first().attr("src");
|
||||
URLs.add(new URL(videoURL));
|
||||
URLs.add(new URI(videoURL).toURL());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,8 @@ import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
@ -48,11 +50,6 @@ public class ErofusRipper extends AbstractHTMLRipper {
|
||||
return m.group(m.groupCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
LOGGER.info(page);
|
||||
@ -94,8 +91,8 @@ public class ErofusRipper extends AbstractHTMLRipper {
|
||||
Map<String,String> opts = new HashMap<String, String>();
|
||||
opts.put("subdirectory", page.title().replaceAll(" \\| Erofus - Sex and Porn Comics", "").replaceAll(" ", "_"));
|
||||
opts.put("prefix", getPrefix(x));
|
||||
addURLToDownload(new URL(image), opts);
|
||||
} catch (MalformedURLException e) {
|
||||
addURLToDownload(new URI(image).toURL(), opts);
|
||||
} catch (MalformedURLException | URISyntaxException e) {
|
||||
LOGGER.info(e.getMessage());
|
||||
}
|
||||
x++;
|
||||
|
@ -2,16 +2,19 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
@ -23,7 +26,7 @@ import com.rarchives.ripme.utils.Http;
|
||||
public class EromeRipper extends AbstractHTMLRipper {
|
||||
|
||||
boolean rippingProfile;
|
||||
|
||||
private HashMap<String, String> cookies = new HashMap<>();
|
||||
|
||||
public EromeRipper (URL url) throws IOException {
|
||||
super(url);
|
||||
@ -31,17 +34,17 @@ public class EromeRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "erome.com";
|
||||
return "erome.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "erome";
|
||||
return "erome";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
addURLToDownload(url, getPrefix(index), "", "erome.com", this.cookies);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -66,39 +69,40 @@ public class EromeRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
LOGGER.info("Unable to find title at " + url);
|
||||
} catch (NullPointerException e) {
|
||||
return getHost() + "_" + getGID(url);
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
LOGGER.info("Unable to find title at " + url);
|
||||
} catch (NullPointerException e) {
|
||||
return getHost() + "_" + getGID(url);
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return new URL(url.toExternalForm().replaceAll("https?://erome.com", "https://www.erome.com"));
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
return new URI(url.toExternalForm().replaceAll("https?://erome.com", "https://www.erome.com")).toURL();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> URLs = new ArrayList<>();
|
||||
return getMediaFromPage(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
this.setAuthCookie();
|
||||
Response resp = Http.url(this.url)
|
||||
.ignoreContentType()
|
||||
.response();
|
||||
.cookies(cookies)
|
||||
.ignoreContentType()
|
||||
.response();
|
||||
|
||||
return resp.parse();
|
||||
}
|
||||
@ -124,18 +128,17 @@ public class EromeRipper extends AbstractHTMLRipper {
|
||||
private List<String> getMediaFromPage(Document doc) {
|
||||
List<String> results = new ArrayList<>();
|
||||
for (Element el : doc.select("img.img-front")) {
|
||||
if (el.hasAttr("src")) {
|
||||
if (el.attr("src").startsWith("https:")) {
|
||||
results.add(el.attr("src"));
|
||||
} else {
|
||||
results.add("https:" + el.attr("src"));
|
||||
}
|
||||
} else if (el.hasAttr("data-src")) {
|
||||
//to add images that are not loaded( as all images are lasyloaded as we scroll).
|
||||
results.add(el.attr("data-src"));
|
||||
}
|
||||
|
||||
}
|
||||
if (el.hasAttr("data-src")) {
|
||||
//to add images that are not loaded( as all images are lasyloaded as we scroll).
|
||||
results.add(el.attr("data-src"));
|
||||
} else if (el.hasAttr("src")) {
|
||||
if (el.attr("src").startsWith("https:")) {
|
||||
results.add(el.attr("src"));
|
||||
} else {
|
||||
results.add("https:" + el.attr("src"));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (Element el : doc.select("source[label=HD]")) {
|
||||
if (el.attr("src").startsWith("https:")) {
|
||||
results.add(el.attr("src"));
|
||||
@ -152,7 +155,22 @@ public class EromeRipper extends AbstractHTMLRipper {
|
||||
results.add("https:" + el.attr("src"));
|
||||
}
|
||||
}
|
||||
|
||||
if (results.size() == 0) {
|
||||
if (cookies.isEmpty()) {
|
||||
LOGGER.warn("You might try setting erome.laravel_session manually " +
|
||||
"if you think this page definitely contains media.");
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private void setAuthCookie() {
|
||||
String sessionId = Utils.getConfigString("erome.laravel_session", null);
|
||||
if (sessionId != null) {
|
||||
cookies.put("laravel_session", sessionId);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -11,7 +13,6 @@ import java.util.regex.Pattern;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
@ -57,8 +58,8 @@ public class ErotivRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return new URL(url.toExternalForm().replaceAll("https?://www.erotiv.io", "https://erotiv.io"));
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
return new URI(url.toExternalForm().replaceAll("https?://www.erotiv.io", "https://erotiv.io")).toURL();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -12,7 +12,6 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class FemjoyhunterRipper extends AbstractHTMLRipper {
|
||||
|
||||
@ -41,12 +40,6 @@ public class FemjoyhunterRipper extends AbstractHTMLRipper {
|
||||
"femjoyhunter.com/ID - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
|
@ -1,72 +1,66 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class FitnakedgirlsRipper extends AbstractHTMLRipper {
|
||||
|
||||
public FitnakedgirlsRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "fitnakedgirls";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "fitnakedgirls.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p;
|
||||
Matcher m;
|
||||
|
||||
p = Pattern.compile("^.*fitnakedgirls\\.com/gallery/(.+)$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected fitnakedgirls.com gallery format: " + "fitnakedgirls.com/gallery/####" + " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
|
||||
Elements imgs = doc.select("div[class*=wp-tiles-tile-bg] > img");
|
||||
for (Element img : imgs) {
|
||||
String imgSrc = img.attr("src");
|
||||
imageURLs.add(imgSrc);
|
||||
}
|
||||
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
// Send referrer when downloading images
|
||||
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
|
||||
}
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
|
||||
public class FitnakedgirlsRipper extends AbstractHTMLRipper {
|
||||
|
||||
public FitnakedgirlsRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "fitnakedgirls";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "fitnakedgirls.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p;
|
||||
Matcher m;
|
||||
|
||||
p = Pattern.compile("^.*fitnakedgirls\\.com/gallery/(.+)$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected fitnakedgirls.com gallery format: " + "fitnakedgirls.com/gallery/####" + " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
|
||||
Elements imgs = doc.select("div[class*=wp-tiles-tile-bg] > img");
|
||||
for (Element img : imgs) {
|
||||
String imgSrc = img.attr("src");
|
||||
imageURLs.add(imgSrc);
|
||||
}
|
||||
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
// Send referrer when downloading images
|
||||
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
|
||||
}
|
||||
}
|
@ -1,10 +1,9 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.*;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
@ -163,8 +162,8 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject getFirstPage() throws IOException {
|
||||
URL apiURL = new URL(baseURL + "&consumer_key=" + CONSUMER_KEY);
|
||||
public JSONObject getFirstPage() throws IOException, URISyntaxException {
|
||||
URL apiURL = new URI(baseURL + "&consumer_key=" + CONSUMER_KEY).toURL();
|
||||
LOGGER.debug("apiURL: " + apiURL);
|
||||
JSONObject json = Http.url(apiURL).getJSON();
|
||||
|
||||
@ -231,7 +230,7 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject getNextPage(JSONObject json) throws IOException {
|
||||
public JSONObject getNextPage(JSONObject json) throws IOException, URISyntaxException {
|
||||
if (isThisATest()) {
|
||||
return null;
|
||||
}
|
||||
@ -248,9 +247,9 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
|
||||
|
||||
sleep(500);
|
||||
++page;
|
||||
URL apiURL = new URL(baseURL
|
||||
URL apiURL = new URI(baseURL
|
||||
+ "&page=" + page
|
||||
+ "&consumer_key=" + CONSUMER_KEY);
|
||||
+ "&consumer_key=" + CONSUMER_KEY).toURL();
|
||||
return Http.url(apiURL).getJSON();
|
||||
}
|
||||
|
||||
@ -295,14 +294,9 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (imageURL == null) {
|
||||
LOGGER.error("Failed to find image for photo " + photo.toString());
|
||||
}
|
||||
else {
|
||||
imageURLs.add(imageURL);
|
||||
if (isThisATest()) {
|
||||
break;
|
||||
}
|
||||
imageURLs.add(imageURL);
|
||||
if (isThisATest()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return imageURLs;
|
||||
@ -310,13 +304,13 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
|
||||
|
||||
private boolean urlExists(String url) {
|
||||
try {
|
||||
HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
|
||||
HttpURLConnection connection = (HttpURLConnection) new URI(url).toURL().openConnection();
|
||||
connection.setRequestMethod("HEAD");
|
||||
if (connection.getResponseCode() != 200) {
|
||||
throw new IOException("Couldn't find full-size image at " + url);
|
||||
}
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -330,8 +324,8 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
|
||||
public void downloadURL(URL url, int index) {
|
||||
String u = url.toExternalForm();
|
||||
String[] fields = u.split("/");
|
||||
String prefix = getPrefix(index) + fields[fields.length - 3];
|
||||
File saveAs = new File(getWorkingDir() + File.separator + prefix + ".jpg");
|
||||
String prefix = "/" + getPrefix(index) + fields[fields.length - 3];
|
||||
Path saveAs = Paths.get(getWorkingDir() + prefix + ".jpg");
|
||||
addURLToDownload(url, saveAs, "", null, false);
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
@ -10,6 +12,7 @@ import java.util.regex.Pattern;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONException;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
@ -19,8 +22,23 @@ import org.jsoup.nodes.Element;
|
||||
|
||||
public class FlickrRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Document albumDoc = null;
|
||||
private final DownloadThreadPool flickrThreadPool;
|
||||
|
||||
private enum UrlType {
|
||||
USER,
|
||||
PHOTOSET
|
||||
}
|
||||
|
||||
private class Album {
|
||||
final UrlType type;
|
||||
final String id;
|
||||
|
||||
Album(UrlType type, String id) {
|
||||
this.type = type;
|
||||
this.id = id;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return flickrThreadPool;
|
||||
@ -46,7 +64,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
String sUrl = url.toExternalForm();
|
||||
// Strip out https
|
||||
sUrl = sUrl.replace("https://secure.flickr.com", "http://www.flickr.com");
|
||||
@ -57,7 +75,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
sUrl += "pool";
|
||||
}
|
||||
return new URL(sUrl);
|
||||
return new URI(sUrl).toURL();
|
||||
}
|
||||
// FLickr is one of those sites what includes a api key in sites javascript
|
||||
// TODO let the user provide their own api key
|
||||
@ -81,40 +99,44 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
// The flickr api is a monster of weird settings so we just request everything that the webview does
|
||||
private String apiURLBuilder(String photoset, String pageNumber, String apiKey) {
|
||||
LOGGER.info("https://api.flickr.com/services/rest?extras=can_addmeta," +
|
||||
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
|
||||
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
|
||||
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
|
||||
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
|
||||
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
|
||||
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
|
||||
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
|
||||
"photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
|
||||
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1");
|
||||
private String apiURLBuilder(Album album, String pageNumber, String apiKey) {
|
||||
String method = null;
|
||||
String idField = null;
|
||||
switch (album.type) {
|
||||
case PHOTOSET:
|
||||
method = "flickr.photosets.getPhotos";
|
||||
idField = "photoset_id=" + album.id;
|
||||
break;
|
||||
case USER:
|
||||
method = "flickr.people.getPhotos";
|
||||
idField = "user_id=" + album.id;
|
||||
break;
|
||||
}
|
||||
|
||||
return "https://api.flickr.com/services/rest?extras=can_addmeta," +
|
||||
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
|
||||
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
|
||||
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
|
||||
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
|
||||
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
|
||||
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
|
||||
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
|
||||
"photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
|
||||
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1";
|
||||
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
|
||||
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
|
||||
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
|
||||
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
|
||||
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
|
||||
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
|
||||
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
|
||||
idField + "&viewerNSID=&method=" + method + "&csrf=&" +
|
||||
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1";
|
||||
}
|
||||
|
||||
private JSONObject getJSON(String page, String apiKey) {
|
||||
URL pageURL = null;
|
||||
String apiURL = null;
|
||||
try {
|
||||
apiURL = apiURLBuilder(getPhotosetID(url.toExternalForm()), page, apiKey);
|
||||
pageURL = new URL(apiURL);
|
||||
} catch (MalformedURLException e) {
|
||||
apiURL = apiURLBuilder(getAlbum(url.toExternalForm()), page, apiKey);
|
||||
pageURL = new URI(apiURL).toURL();
|
||||
} catch (MalformedURLException | URISyntaxException e) {
|
||||
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
|
||||
}
|
||||
try {
|
||||
LOGGER.info(Http.url(pageURL).ignoreContentType().get().text());
|
||||
LOGGER.info("Fetching: " + apiURL);
|
||||
LOGGER.info("Response: " + Http.url(pageURL).ignoreContentType().get().text());
|
||||
return new JSONObject(Http.url(pageURL).ignoreContentType().get().text());
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
|
||||
@ -122,31 +144,42 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
}
|
||||
|
||||
private String getPhotosetID(String url) {
|
||||
private Album getAlbum(String url) throws MalformedURLException {
|
||||
Pattern p; Matcher m;
|
||||
|
||||
// Root: https://www.flickr.com/photos/115858035@N04/
|
||||
// User photostream: https://www.flickr.com/photos/115858035@N04/
|
||||
// Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/
|
||||
|
||||
final String domainRegex = "https?://[wm.]*flickr.com";
|
||||
final String userRegex = "[a-zA-Z0-9@_-]+";
|
||||
// Album
|
||||
p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/(sets|albums)/([0-9]+)/?.*$");
|
||||
p = Pattern.compile("^" + domainRegex + "/photos/" + userRegex + "/(sets|albums)/([0-9]+)/?.*$");
|
||||
m = p.matcher(url);
|
||||
if (m.matches()) {
|
||||
return m.group(3);
|
||||
return new Album(UrlType.PHOTOSET, m.group(2));
|
||||
}
|
||||
return null;
|
||||
|
||||
// User photostream
|
||||
p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/?$");
|
||||
m = p.matcher(url);
|
||||
if (m.matches()) {
|
||||
return new Album(UrlType.USER, m.group(1));
|
||||
}
|
||||
|
||||
String errorMessage = "Failed to extract photoset ID from url: " + url;
|
||||
|
||||
LOGGER.error(errorMessage);
|
||||
throw new MalformedURLException(errorMessage);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
if (!url.toExternalForm().contains("/sets/")) {
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
try {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
String user = url.toExternalForm();
|
||||
user = user.substring(user.indexOf("/photos/") + "/photos/".length());
|
||||
user = user.substring(0, user.indexOf("/"));
|
||||
@ -196,13 +229,6 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
return albumDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
@ -214,15 +240,29 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
|
||||
break;
|
||||
} else {
|
||||
int totalPages = jsonData.getJSONObject("photoset").getInt("pages");
|
||||
// Determine root key
|
||||
JSONObject rootData;
|
||||
|
||||
try {
|
||||
rootData = jsonData.getJSONObject("photoset");
|
||||
} catch (JSONException e) {
|
||||
try {
|
||||
rootData = jsonData.getJSONObject("photos");
|
||||
} catch (JSONException innerE) {
|
||||
LOGGER.error("Unable to find photos in response");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int totalPages = rootData.getInt("pages");
|
||||
LOGGER.info(jsonData);
|
||||
JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo");
|
||||
JSONArray pictures = rootData.getJSONArray("photo");
|
||||
for (int i = 0; i < pictures.length(); i++) {
|
||||
LOGGER.info(i);
|
||||
JSONObject data = (JSONObject) pictures.get(i);
|
||||
try {
|
||||
addURLToDownload(getLargestImageURL(data.getString("id"), apiKey));
|
||||
} catch (MalformedURLException e) {
|
||||
} catch (MalformedURLException | URISyntaxException e) {
|
||||
LOGGER.error("Flickr MalformedURLException: " + e.getMessage());
|
||||
}
|
||||
|
||||
@ -245,11 +285,11 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException {
|
||||
private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException, URISyntaxException {
|
||||
TreeMap<Integer, String> imageURLMap = new TreeMap<>();
|
||||
|
||||
try {
|
||||
URL imageAPIURL = new URL("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1");
|
||||
URL imageAPIURL = new URI("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1").toURL();
|
||||
JSONArray imageSizes = new JSONObject(Http.url(imageAPIURL).ignoreContentType().get().text()).getJSONObject("sizes").getJSONArray("size");
|
||||
for (int i = 0; i < imageSizes.length(); i++) {
|
||||
JSONObject imageInfo = imageSizes.getJSONObject(i);
|
||||
@ -264,6 +304,6 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
LOGGER.error("IOException while looking at image sizes: " + e.getMessage());
|
||||
}
|
||||
|
||||
return new URL(imageURLMap.lastEntry().getValue());
|
||||
return new URI(imageURLMap.lastEntry().getValue()).toURL();
|
||||
}
|
||||
}
|
||||
|
@ -10,17 +10,10 @@ import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.VideoRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
public class FooktubeRipper extends AbstractSingleFileRipper {
|
||||
|
||||
public class MulemaxRipper extends AbstractSingleFileRipper {
|
||||
|
||||
private static final String HOST = "mulemax";
|
||||
|
||||
public MulemaxRipper(URL url) throws IOException {
|
||||
public FooktubeRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@ -34,14 +27,10 @@ public class MulemaxRipper extends AbstractSingleFileRipper {
|
||||
return "mulemax.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
Pattern p = Pattern.compile("^https?://.*mulemax\\.com/video/(.*)/.*$");
|
||||
Pattern p = Pattern.compile("^https?://.*fooktube\\.com/video/(.*)/.*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
return m.matches();
|
||||
}
|
||||
@ -53,15 +42,15 @@ public class MulemaxRipper extends AbstractSingleFileRipper {
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://.*mulemax\\.com/video/(.*)/(.*)$");
|
||||
Pattern p = Pattern.compile("^https?://.*fooktube\\.com/video/(.*)/(.*)$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(2);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected mulemax format:"
|
||||
+ "mulemax.com/video/####"
|
||||
"Expected fooktube format:"
|
||||
+ "fooktube.com/video/####"
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
@ -0,0 +1,74 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class FreeComicOnlineRipper extends AbstractHTMLRipper {
|
||||
|
||||
public FreeComicOnlineRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "freecomiconline";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "freecomiconline.me";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1) + "_" + m.group(2);
|
||||
}
|
||||
p = Pattern.compile("^https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/?$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected freecomiconline URL format: " +
|
||||
"freecomiconline.me/TITLE/CHAPTER - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
String nextPage = doc.select("div.select-pagination a").get(1).attr("href");
|
||||
String nextUrl = "";
|
||||
Pattern p = Pattern.compile("https://freecomiconline.me/comic/([a-zA-Z0-9_\\-]+)/([a-zA-Z0-9_\\-]+)/?$");
|
||||
Matcher m = p.matcher(nextPage);
|
||||
if(m.matches()){
|
||||
nextUrl = m.group(0);
|
||||
}
|
||||
if(nextUrl.equals("")) throw new IOException("No more pages");
|
||||
sleep(500);
|
||||
return Http.url(nextUrl).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
for (Element el : doc.select(".wp-manga-chapter-img")) {
|
||||
result.add(el.attr("src"));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
@ -1,10 +1,12 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@ -15,11 +17,10 @@ import java.util.regex.Pattern;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.HttpStatusException;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.safety.Whitelist;
|
||||
import org.jsoup.safety.Safelist;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
@ -91,14 +92,13 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
||||
String nextUrl = urlBase + nextPageUrl.first().attr("href");
|
||||
|
||||
sleep(500);
|
||||
Document nextPage = Http.url(nextUrl).cookies(cookies).get();
|
||||
|
||||
return nextPage;
|
||||
return Http.url(nextUrl).cookies(cookies).get();
|
||||
}
|
||||
|
||||
private String getImageFromPost(String url) {
|
||||
sleep(1000);
|
||||
Document d = null;
|
||||
Document d;
|
||||
try {
|
||||
d = Http.url(url).cookies(cookies).get();
|
||||
Elements links = d.getElementsByTag("a");
|
||||
@ -125,6 +125,9 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
||||
urls.add(urlToAdd);
|
||||
}
|
||||
}
|
||||
if (isStopped() || isThisATest()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return urls;
|
||||
}
|
||||
@ -164,7 +167,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
||||
ele.select("br").append("\\n");
|
||||
ele.select("p").prepend("\\n\\n");
|
||||
LOGGER.debug("Returning description at " + page);
|
||||
String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
|
||||
String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Safelist.none(), new Document.OutputSettings().prettyPrint(false));
|
||||
return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
|
||||
} catch (IOException ioe) {
|
||||
LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
|
||||
@ -181,24 +184,22 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
String newText = "";
|
||||
String saveAs = "";
|
||||
File saveFileAs;
|
||||
Path saveFileAs;
|
||||
saveAs = text.split("\n")[0];
|
||||
saveAs = saveAs.replaceAll("^(\\S+)\\s+by\\s+(.*)$", "$2_$1");
|
||||
for (int i = 1;i < text.split("\n").length; i++) {
|
||||
newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
|
||||
}
|
||||
try {
|
||||
if (!subdirectory.equals("")) {
|
||||
subdirectory = File.separator + subdirectory;
|
||||
}
|
||||
saveFileAs = new File(
|
||||
workingDir.getCanonicalPath()
|
||||
saveFileAs = Paths.get(
|
||||
workingDir
|
||||
+ "/"
|
||||
+ subdirectory
|
||||
+ File.separator
|
||||
+ "/"
|
||||
+ saveAs
|
||||
+ ".txt");
|
||||
// Write the file
|
||||
FileOutputStream out = (new FileOutputStream(saveFileAs));
|
||||
OutputStream out = Files.newOutputStream(saveFileAs);
|
||||
out.write(text.getBytes());
|
||||
out.close();
|
||||
} catch (IOException e) {
|
||||
@ -206,9 +207,13 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
||||
return false;
|
||||
}
|
||||
LOGGER.debug("Downloading " + url + "'s description to " + saveFileAs);
|
||||
if (!saveFileAs.getParentFile().exists()) {
|
||||
if (!Files.exists(saveFileAs.getParent())) {
|
||||
LOGGER.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
|
||||
saveFileAs.getParentFile().mkdirs();
|
||||
try {
|
||||
Files.createDirectory(saveFileAs.getParent());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -40,7 +42,7 @@ public class FuskatorRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
String u = url.toExternalForm();
|
||||
if (u.contains("/thumbs/")) {
|
||||
u = u.replace("/thumbs/", "/full/");
|
||||
@ -48,7 +50,7 @@ public class FuskatorRipper extends AbstractHTMLRipper {
|
||||
if (u.contains("/expanded/")) {
|
||||
u = u.replaceAll("/expanded/", "/full/");
|
||||
}
|
||||
return new URL(u);
|
||||
return new URI(u).toURL();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1,159 +0,0 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
|
||||
public class GfycatRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String HOST = "gfycat.com";
|
||||
String username = "";
|
||||
String cursor = "";
|
||||
String count = "30";
|
||||
|
||||
|
||||
|
||||
public GfycatRipper(URL url) throws IOException {
|
||||
super(new URL(url.toExternalForm().split("-")[0].replace("thumbs.", "")));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "gfycat.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "gfycat";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith(HOST);
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
String sUrl = url.toExternalForm();
|
||||
sUrl = sUrl.replace("/gifs/detail", "");
|
||||
sUrl = sUrl.replace("/amp", "");
|
||||
return new URL(sUrl);
|
||||
}
|
||||
|
||||
public boolean isProfile() {
|
||||
Pattern p = Pattern.compile("^https?://[wm.]*gfycat\\.com/@([a-zA-Z0-9]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
return m.matches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (!isProfile()) {
|
||||
return Http.url(url).get();
|
||||
} else {
|
||||
username = getGID(url);
|
||||
return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats")).ignoreContentType().get();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://(thumbs\\.|[wm\\.]*)gfycat\\.com/@?([a-zA-Z0-9]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
|
||||
if (m.matches())
|
||||
return m.group(2);
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected gfycat.com format: "
|
||||
+ "gfycat.com/id or "
|
||||
+ "thumbs.gfycat.com/id.gif"
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
private String stripHTMLTags(String t) {
|
||||
t = t.replaceAll("<html>\n" +
|
||||
" <head></head>\n" +
|
||||
" <body>", "");
|
||||
t = t.replaceAll("</body>\n" +
|
||||
"</html>", "");
|
||||
t = t.replaceAll("\n", "");
|
||||
t = t.replaceAll("=\"\"", "");
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
if (cursor.equals("")) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
return Http.url(new URL("https://api.gfycat.com/v1/users/" + username + "/gfycats?count=" + count + "&cursor=" + cursor)).ignoreContentType().get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
if (isProfile()) {
|
||||
JSONObject page = new JSONObject(stripHTMLTags(doc.html()));
|
||||
JSONArray content = page.getJSONArray("gfycats");
|
||||
for (int i = 0; i < content.length(); i++) {
|
||||
result.add(content.getJSONObject(i).getString("mp4Url"));
|
||||
}
|
||||
cursor = page.getString("cursor");
|
||||
} else {
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
result.add(page.getJSONObject("video").getString("contentUrl"));
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for retrieving video URLs.
|
||||
* @param url URL to gfycat page
|
||||
* @return URL to video
|
||||
* @throws IOException
|
||||
*/
|
||||
public static String getVideoURL(URL url) throws IOException {
|
||||
LOGGER.info("Retrieving " + url.toExternalForm());
|
||||
|
||||
//Sanitize the URL first
|
||||
url = new URL(url.toExternalForm().replace("/gifs/detail", ""));
|
||||
|
||||
Document doc = Http.url(url).get();
|
||||
Elements videos = doc.select("script");
|
||||
for (Element el : videos) {
|
||||
String json = el.html();
|
||||
if (json.startsWith("{")) {
|
||||
JSONObject page = new JSONObject(json);
|
||||
return page.getJSONObject("video").getString("contentUrl");
|
||||
}
|
||||
}
|
||||
throw new IOException();
|
||||
}
|
||||
}
|
@ -11,8 +11,6 @@ import java.util.regex.Pattern;
|
||||
import com.rarchives.ripme.ripper.AbstractSingleFileRipper;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class GfycatporntubeRipper extends AbstractSingleFileRipper {
|
||||
|
||||
public GfycatporntubeRipper(URL url) throws IOException {
|
||||
@ -40,12 +38,6 @@ public class GfycatporntubeRipper extends AbstractSingleFileRipper {
|
||||
"gfycatporntube.com/NAME - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
|
@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -13,11 +14,8 @@ import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
|
||||
public GirlsOfDesireRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@ -32,10 +30,10 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
||||
return "girlsofdesire.org";
|
||||
}
|
||||
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
Elements elems = doc.select(".albumName");
|
||||
return getHost() + "_" + elems.first().text();
|
||||
} catch (Exception e) {
|
||||
@ -62,14 +60,6 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
return albumDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
|
@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -49,9 +50,9 @@ public class HbrowseRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
|
||||
return getHost() + "_" + title + "_" + getGID(url);
|
||||
} catch (Exception e) {
|
||||
|
@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -90,7 +91,7 @@ public class Hentai2readRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
return getHost() + "_" + getGID(url);
|
||||
} catch (Exception e) {
|
||||
|
@ -0,0 +1,184 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Base64;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import org.jsoup.nodes.DataNode;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
public class HentaiNexusRipper extends AbstractJSONRipper {
|
||||
|
||||
public HentaiNexusRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "hentainexus";
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "hentainexus.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
/*
|
||||
Valid URLs are /view/id, /read/id and those 2 with #pagenumber
|
||||
https://hentainexus.com/view/9202
|
||||
https://hentainexus.com/read/9202
|
||||
https://hentainexus.com/view/9202#001
|
||||
https://hentainexus.com/read/9202#001
|
||||
*/
|
||||
|
||||
Pattern p = Pattern.compile("^https?://hentainexus\\.com/(?:view|read)/([0-9]+)(?:\\#[0-9]+)*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected hentainexus.com URL format: " +
|
||||
"hentainexus.com/view/id OR hentainexus.com/read/id - got " + url + "instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromJSON(JSONObject json) throws JSONException {
|
||||
|
||||
List<String> urlList = new ArrayList<>();
|
||||
|
||||
JSONArray imagesList = json.getJSONArray("f");
|
||||
String host = json.getString("b");
|
||||
String folder = json.getString("r");
|
||||
String id = json.getString("i");
|
||||
|
||||
for (Object singleImage : imagesList) {
|
||||
String hashTMP = ((JSONObject) singleImage).getString("h");
|
||||
String fileNameTMP = ((JSONObject) singleImage).getString("p");
|
||||
String imageUrlTMP = String.format("%s%s%s/%s/%s",host,folder,hashTMP,id,fileNameTMP);
|
||||
urlList.add(imageUrlTMP);
|
||||
}
|
||||
|
||||
return urlList;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JSONObject getFirstPage() throws IOException, URISyntaxException {
|
||||
String jsonEncodedString = getJsonEncodedStringFromPage();
|
||||
String jsonDecodedString = decodeJsonString(jsonEncodedString);
|
||||
return new JSONObject(jsonDecodedString);
|
||||
}
|
||||
|
||||
public String getJsonEncodedStringFromPage() throws MalformedURLException, IOException, URISyntaxException {
|
||||
// Image data only appears on the /read/ page and not on the /view/ one.
|
||||
URL readUrl = new URI(String.format("http://hentainexus.com/read/%s",getGID(url))).toURL();
|
||||
Document document = Http.url(readUrl).response().parse();
|
||||
|
||||
for (Element scripts : document.getElementsByTag("script")) {
|
||||
for (DataNode dataNode : scripts.dataNodes()) {
|
||||
if (dataNode.getWholeData().contains("initReader")) {
|
||||
// Extract JSON encoded string from the JavaScript initReader() call.
|
||||
String data = dataNode.getWholeData().trim().replaceAll("\\r|\\n|\\t","");
|
||||
|
||||
Pattern p = Pattern.compile(".*?initReader\\(\"(.*?)\",.*?\\).*?");
|
||||
Matcher m = p.matcher(data);
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
public String decodeJsonString(String jsonEncodedString)
|
||||
{
|
||||
/*
|
||||
The initReader() JavaScript function accepts 2 parameters: a weird string and the window title (we can ignore this).
|
||||
The weird string is a JSON string with some bytes shifted and swapped around and then encoded in base64.
|
||||
The following code is a Java adaptation of the initRender() JavaScript function after manual deobfuscation.
|
||||
*/
|
||||
|
||||
byte[] jsonBytes = Base64.getDecoder().decode(jsonEncodedString);
|
||||
|
||||
ArrayList unknownArray = new ArrayList();
|
||||
ArrayList<Integer> indexesToUse = new ArrayList<>();
|
||||
|
||||
for (int i = 0x2; unknownArray.size() < 0x10; ++i) {
|
||||
if (!indexesToUse.contains(i)) {
|
||||
unknownArray.add(i);
|
||||
for (int j = i << 0x1; j <= 0x100; j += i) {
|
||||
if (!indexesToUse.contains(j)) {
|
||||
indexesToUse.add(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
byte magicByte = 0x0;
|
||||
for (int i = 0x0; i < 0x40; i++) {
|
||||
magicByte = (byte) (signedToUnsigned(magicByte) ^ signedToUnsigned(jsonBytes[i]));
|
||||
for (int j = 0x0; j < 0x8; j++) {
|
||||
long unsignedMagicByteTMP = signedToUnsigned(magicByte);
|
||||
magicByte = (byte) ((unsignedMagicByteTMP & 0x1) == 1 ? unsignedMagicByteTMP >>> 0x1 ^ 0xc : unsignedMagicByteTMP >>> 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
magicByte = (byte) (magicByte & 0x7);
|
||||
ArrayList<Integer> newArray = new ArrayList<>();
|
||||
|
||||
for (int i = 0x0; i < 0x100; i++) {
|
||||
newArray.add(i);
|
||||
}
|
||||
|
||||
int newIndex = 0, backup = 0;
|
||||
for (int i = 0x0; i < 0x100; i++) {
|
||||
newIndex = (newIndex + newArray.get(i) + (int) signedToUnsigned(jsonBytes[i % 0x40])) % 0x100;
|
||||
backup = newArray.get(i);
|
||||
newArray.set(i, newArray.get(newIndex));
|
||||
newArray.set(newIndex, backup);
|
||||
}
|
||||
|
||||
int magicByteTranslated = (int) unknownArray.get(magicByte);
|
||||
int index1 = 0x0, index2 = 0x0, index3 = 0x0, swap1 = 0x0, xorNumber = 0x0;
|
||||
String decodedJsonString = "";
|
||||
|
||||
for (int i = 0x0; i + 0x40 < jsonBytes.length; i++) {
|
||||
index1 = (index1 + magicByteTranslated) % 0x100;
|
||||
index2 = (index3 + newArray.get((index2 + newArray.get(index1)) % 0x100)) % 0x100;
|
||||
index3 = (index3 + index1 + newArray.get(index1)) % 0x100;
|
||||
swap1 = newArray.get(index1);
|
||||
newArray.set(index1, newArray.get(index2));
|
||||
newArray.set(index2,swap1);
|
||||
xorNumber = newArray.get((index2 + newArray.get((index1 + newArray.get((xorNumber + index3) % 0x100)) % 0x100)) % 0x100);
|
||||
decodedJsonString += Character.toString((char) signedToUnsigned((jsonBytes[i + 0x40] ^ xorNumber)));
|
||||
}
|
||||
|
||||
return decodedJsonString;
|
||||
}
|
||||
|
||||
|
||||
private static long signedToUnsigned(int signed) {
|
||||
return (byte) signed & 0xFF;
|
||||
}
|
||||
|
||||
}
|
@ -10,6 +10,7 @@ import org.jsoup.nodes.Document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
@ -50,12 +51,6 @@ public class HentaidudeRipper extends AbstractSingleFileRipper {
|
||||
"Expected hqporner URL format: " + "hentaidude.com/VIDEO - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
@ -84,7 +79,7 @@ public class HentaidudeRipper extends AbstractSingleFileRipper {
|
||||
return hentaidudeThreadPool;
|
||||
}
|
||||
|
||||
private class HentaidudeDownloadThread extends Thread {
|
||||
private class HentaidudeDownloadThread implements Runnable {
|
||||
|
||||
private URL url;
|
||||
|
||||
@ -97,7 +92,7 @@ public class HentaidudeRipper extends AbstractSingleFileRipper {
|
||||
public void run() {
|
||||
try {
|
||||
Document doc = Http.url(url).get();
|
||||
URL videoSourceUrl = new URL(getVideoUrl(doc));
|
||||
URL videoSourceUrl = new URI(getVideoUrl(doc)).toURL();
|
||||
addURLToDownload(videoSourceUrl, "", "", "", null, getVideoName(), "mp4");
|
||||
} catch (Exception e) {
|
||||
LOGGER.error("Could not get video url for " + getVideoName(), e);
|
||||
|
@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -12,7 +13,6 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class HentaifoxRipper extends AbstractHTMLRipper {
|
||||
|
||||
@ -41,12 +41,6 @@ public class HentaifoxRipper extends AbstractHTMLRipper {
|
||||
"https://hentaifox.com/gallery/ID - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
LOGGER.info(doc);
|
||||
@ -59,9 +53,9 @@ public class HentaifoxRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
String title = doc.select("div.info > h1").first().text();
|
||||
return getHost() + "_" + title + "_" + getGID(url);
|
||||
} catch (Exception e) {
|
||||
|
@ -52,13 +52,6 @@ public class HentaiimageRipper extends AbstractHTMLRipper {
|
||||
"https://hentai-image.com/image/ID - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
|
@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -13,7 +15,6 @@ import org.jsoup.nodes.Document;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
public class HitomiRipper extends AbstractHTMLRipper {
|
||||
|
||||
@ -35,20 +36,20 @@ public class HitomiRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("https://hitomi.la/galleries/([\\d]+).html");
|
||||
Pattern p = Pattern.compile("https://hitomi.la/(cg|doujinshi|gamecg|manga)/(.+).html");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
galleryId = m.group(1);
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected hitomi URL format: " +
|
||||
"https://hitomi.la/galleries/ID.html - got " + url + " instead");
|
||||
"https://hitomi.la/(cg|doujinshi|gamecg|manga)/ID.html - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
public Document getFirstPage() throws IOException, URISyntaxException {
|
||||
// if we go to /GALLERYID.js we get a nice json array of all images in the gallery
|
||||
return Http.url(new URL(url.toExternalForm().replaceAll("hitomi", "ltn.hitomi").replaceAll(".html", ".js"))).ignoreContentType().get();
|
||||
return Http.url(new URI(url.toExternalForm().replaceAll("hitomi", "ltn.hitomi").replaceAll(".html", ".js")).toURL()).ignoreContentType().get();
|
||||
}
|
||||
|
||||
|
||||
@ -65,7 +66,7 @@ public class HitomiRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
// Attempt to use album title and username as GID
|
||||
Document doc = Http.url(url).get();
|
||||
|
@ -11,6 +11,8 @@ import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -63,9 +65,8 @@ public class HqpornerRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
public Document getFirstPage() throws IOException, URISyntaxException {
|
||||
return super.getFirstPage();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -130,7 +131,7 @@ public class HqpornerRipper extends AbstractHTMLRipper {
|
||||
return true;
|
||||
}
|
||||
|
||||
private class HqpornerDownloadThread extends Thread {
|
||||
private class HqpornerDownloadThread implements Runnable {
|
||||
|
||||
private URL hqpornerVideoPageUrl;
|
||||
//private int index;
|
||||
@ -164,10 +165,10 @@ public class HqpornerRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
if (downloadUrl != null) {
|
||||
addURLToDownload(new URL(downloadUrl), "", subdirectory, "", null, getVideoName(), "mp4");
|
||||
addURLToDownload(new URI(downloadUrl).toURL(), "", subdirectory, "", null, getVideoName(), "mp4");
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
LOGGER.error("[!] Exception while downloading video.", e);
|
||||
}
|
||||
}
|
||||
@ -215,7 +216,7 @@ public class HqpornerRipper extends AbstractHTMLRipper {
|
||||
|
||||
try {
|
||||
logger.info("Trying to download from unknown video host " + videoPageurl);
|
||||
URL url = new URL(videoPageurl);
|
||||
URL url = new URI(videoPageurl).toURL();
|
||||
Response response = Http.url(url).referrer(hqpornerVideoPageUrl).response();
|
||||
Document doc = response.parse();
|
||||
|
||||
@ -245,7 +246,7 @@ public class HqpornerRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
logger.error("Unable to get video url using generic methods.");
|
||||
}
|
||||
|
||||
|
@ -46,12 +46,6 @@ public class HypnohubRipper extends AbstractHTMLRipper {
|
||||
"hypnohub.net/pool/show/ID - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
private String ripPost(String url) throws IOException {
|
||||
LOGGER.info(url);
|
||||
Document doc = Http.url(url).get();
|
||||
|
@ -1,112 +0,0 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class ImagearnRipper extends AbstractHTMLRipper {
|
||||
|
||||
public ImagearnRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "imagearn";
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "imagearn.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^.*imagearn.com/+gallery.php\\?id=([0-9]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException(
|
||||
"Expected imagearn.com gallery formats: "
|
||||
+ "imagearn.com/gallery.php?id=####..."
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^.*imagearn.com/+image.php\\?id=[0-9]+.*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
// URL points to imagearn *image*, not gallery
|
||||
try {
|
||||
url = getGalleryFromImage(url);
|
||||
} catch (Exception e) {
|
||||
LOGGER.error("[!] " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
private URL getGalleryFromImage(URL url) throws IOException {
|
||||
Document doc = Http.url(url).get();
|
||||
for (Element link : doc.select("a[href~=^gallery\\.php.*$]")) {
|
||||
LOGGER.info("LINK: " + link.toString());
|
||||
if (link.hasAttr("href")
|
||||
&& link.attr("href").contains("gallery.php")) {
|
||||
url = new URL("http://imagearn.com/" + link.attr("href"));
|
||||
LOGGER.info("[!] Found gallery from given link: " + url);
|
||||
return url;
|
||||
}
|
||||
}
|
||||
throw new IOException("Failed to find gallery at URL " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
Document doc = getFirstPage();
|
||||
String title = doc.select("h3 > strong").first().text(); // profile name
|
||||
return getHost() + "_" + title + "_" + getGID(url);
|
||||
} catch (Exception e) {
|
||||
// Fall back to default album naming convention
|
||||
LOGGER.warn("Failed to get album title from " + url, e);
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
for (Element thumb : doc.select("div#gallery > div > a")) {
|
||||
String imageURL = thumb.attr("href");
|
||||
try {
|
||||
Document imagedoc = new Http("http://imagearn.com/" + imageURL).get();
|
||||
String image = imagedoc.select("a.thickbox").first().attr("href");
|
||||
imageURLs.add(image);
|
||||
} catch (IOException e) {
|
||||
LOGGER.warn("Was unable to download page: " + imageURL);
|
||||
}
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
sleep(1000);
|
||||
}
|
||||
}
|
@ -6,20 +6,24 @@ import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
|
||||
// Thread pool for finding direct image links from "image" pages (html)
|
||||
private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam");
|
||||
@Override
|
||||
@ -45,7 +49,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
Pattern p;
|
||||
Matcher m;
|
||||
|
||||
p = Pattern.compile("^https?://[wm.]*imagebam.com/gallery/([a-zA-Z0-9]+).*$");
|
||||
p = Pattern.compile("^https?://[wm.]*imagebam.com/(gallery|view)/([a-zA-Z0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
@ -57,14 +61,6 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
return albumDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// Find next page
|
||||
@ -80,7 +76,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) {
|
||||
for (Element thumb : doc.select("div > a[class=thumbnail]:not(.footera)")) {
|
||||
imageURLs.add(thumb.attr("href"));
|
||||
}
|
||||
return imageURLs;
|
||||
@ -94,18 +90,15 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Elements elems = getFirstPage().select("legend");
|
||||
Elements elems = getCachedFirstPage().select("[id=gallery-name]");
|
||||
String title = elems.first().text();
|
||||
LOGGER.info("Title text: '" + title + "'");
|
||||
Pattern p = Pattern.compile("^(.*)\\s\\d* image.*$");
|
||||
Matcher m = p.matcher(title);
|
||||
if (m.matches()) {
|
||||
return getHost() + "_" + getGID(url) + " (" + m.group(1).trim() + ")";
|
||||
if (StringUtils.isNotBlank(title)) {
|
||||
return getHost() + "_" + getGID(url) + " (" + title + ")";
|
||||
}
|
||||
LOGGER.info("Doesn't match " + p.pattern());
|
||||
} catch (Exception e) {
|
||||
// Fall back to default album naming convention
|
||||
LOGGER.warn("Failed to get album title from " + url, e);
|
||||
@ -118,9 +111,9 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
*
|
||||
* Handles case when site has IP-banned the user.
|
||||
*/
|
||||
private class ImagebamImageThread extends Thread {
|
||||
private URL url; //link to "image page"
|
||||
private int index; //index in album
|
||||
private class ImagebamImageThread implements Runnable {
|
||||
private final URL url; //link to "image page"
|
||||
private final int index; //index in album
|
||||
|
||||
ImagebamImageThread(URL url, int index) {
|
||||
super();
|
||||
@ -138,19 +131,19 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
*/
|
||||
private void fetchImage() {
|
||||
try {
|
||||
Document doc = Http.url(url).get();
|
||||
Map<String, String> cookies = new HashMap<>();
|
||||
cookies.put("nsfw_inter", "1");
|
||||
Document doc = Jsoup.connect(url.toString())
|
||||
.cookies(cookies)
|
||||
.get();
|
||||
|
||||
// Find image
|
||||
Elements metaTags = doc.getElementsByTag("meta");
|
||||
|
||||
String imgsrc = "";//initialize, so no NullPointerExceptions should ever happen.
|
||||
|
||||
for (Element metaTag: metaTags) {
|
||||
//the direct link to the image seems to always be linked in the <meta> part of the html.
|
||||
if (metaTag.attr("property").equals("og:image")) {
|
||||
imgsrc = metaTag.attr("content");
|
||||
LOGGER.info("Found URL " + imgsrc);
|
||||
break;//only one (useful) image possible for an "image page".
|
||||
}
|
||||
Elements elem = doc.select("img[class*=main-image]");
|
||||
if ((elem != null) && (elem.size() > 0)) {
|
||||
imgsrc = elem.first().attr("src");
|
||||
}
|
||||
|
||||
//for debug, or something goes wrong.
|
||||
@ -165,8 +158,8 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
prefix = String.format("%03d_", index);
|
||||
}
|
||||
|
||||
addURLToDownload(new URL(imgsrc), prefix);
|
||||
} catch (IOException e) {
|
||||
addURLToDownload(new URI(imgsrc).toURL(), prefix);
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,13 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
@ -10,14 +15,26 @@ import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class ImagefapRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Document albumDoc = null;
|
||||
private boolean isNewAlbumType = false;
|
||||
private int callsMade = 0;
|
||||
private long startTime = System.nanoTime();
|
||||
|
||||
private static final int RETRY_LIMIT = 10;
|
||||
private static final int HTTP_RETRY_LIMIT = 3;
|
||||
private static final int RATE_LIMIT_HOUR = 1000;
|
||||
|
||||
// All sleep times are in milliseconds
|
||||
private static final int PAGE_SLEEP_TIME = 60 * 60 * 1000 / RATE_LIMIT_HOUR;
|
||||
private static final int IMAGE_SLEEP_TIME = 60 * 60 * 1000 / RATE_LIMIT_HOUR;
|
||||
// Timeout when blocked = 1 hours. Retry every retry within the hour mark + 1 time after the hour mark.
|
||||
private static final int IP_BLOCK_SLEEP_TIME = (int) Math.round((double) 60 / (RETRY_LIMIT - 1) * 60 * 1000);
|
||||
|
||||
public ImagefapRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@ -36,54 +53,40 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
||||
* Reformat given URL into the desired format (all images on single page)
|
||||
*/
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
String gid = getGID(url);
|
||||
String newURL = "http://www.imagefap.com/gallery.php?";
|
||||
if (isNewAlbumType) {
|
||||
newURL += "p";
|
||||
}
|
||||
newURL += "gid=" + gid + "&view=2";
|
||||
String newURL = "https://www.imagefap.com/pictures/" + gid + "/random-string";
|
||||
LOGGER.debug("Changed URL from " + url + " to " + newURL);
|
||||
return new URL(newURL);
|
||||
return new URI(newURL).toURL();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p; Matcher m;
|
||||
|
||||
// Old format (I suspect no longer supported)
|
||||
p = Pattern.compile("^.*imagefap.com/gallery.php\\?pgid=([a-f0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
isNewAlbumType = true;
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
p = Pattern.compile("^.*imagefap.com/gallery.php\\?gid=([0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
p = Pattern.compile("^.*imagefap.com/pictures/([0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
p = Pattern.compile("^.*imagefap.com/pictures/([a-f0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
isNewAlbumType = true;
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
p = Pattern.compile("^.*imagefap.com/gallery/([0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
p = Pattern.compile("^.*imagefap.com/gallery/([a-f0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
isNewAlbumType = true;
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
// most recent format
|
||||
p = Pattern.compile("^.*imagefap.com/pictures/([a-f0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
@ -96,41 +99,72 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
return albumDoc;
|
||||
|
||||
Document firstPage = getPageWithRetries(url);
|
||||
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, "Loading first page...");
|
||||
|
||||
return firstPage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
public Document getNextPage(Document doc) throws IOException, URISyntaxException {
|
||||
String nextURL = null;
|
||||
for (Element a : doc.select("a.link3")) {
|
||||
if (a.text().contains("next")) {
|
||||
nextURL = "http://imagefap.com/gallery.php" + a.attr("href");
|
||||
nextURL = this.sanitizeURL(this.url) + a.attr("href");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (nextURL == null) {
|
||||
throw new IOException("No next page found");
|
||||
}
|
||||
sleep(1000);
|
||||
return Http.url(nextURL).get();
|
||||
// Sleep before fetching next page.
|
||||
sleep(PAGE_SLEEP_TIME);
|
||||
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, "Loading next page URL: " + nextURL);
|
||||
LOGGER.info("Attempting to load next page URL: " + nextURL);
|
||||
|
||||
// Load next page
|
||||
Document nextPage = getPageWithRetries(new URI(nextURL).toURL());
|
||||
|
||||
return nextPage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
|
||||
LOGGER.debug("Trying to get URLs from document... ");
|
||||
|
||||
for (Element thumb : doc.select("#gallery img")) {
|
||||
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
|
||||
continue;
|
||||
}
|
||||
String image = getFullSizedImage("https://www.imagefap.com" + thumb.parent().attr("href"));
|
||||
|
||||
if (image == null) {
|
||||
for (int i = 0; i < HTTP_RETRY_LIMIT; i++) {
|
||||
image = getFullSizedImage("https://www.imagefap.com" + thumb.parent().attr("href"));
|
||||
if (image != null) {
|
||||
break;
|
||||
}
|
||||
sleep(PAGE_SLEEP_TIME);
|
||||
}
|
||||
if (image == null)
|
||||
throw new RuntimeException("Unable to extract image URL from single image page! Unable to continue");
|
||||
}
|
||||
|
||||
LOGGER.debug("Adding imageURL: '" + image + "'");
|
||||
|
||||
imageURLs.add(image);
|
||||
if (isThisATest()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
LOGGER.debug("Adding " + imageURLs.size() + " URLs to download");
|
||||
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@ -141,10 +175,10 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException, URISyntaxException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
String title = getFirstPage().title();
|
||||
String title = getCachedFirstPage().title();
|
||||
title = title.replace("Porn Pics & Porn GIFs", "");
|
||||
title = title.replace(" ", "_");
|
||||
String toReturn = getHost() + "_" + title + "_" + getGID(url);
|
||||
@ -156,11 +190,128 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
||||
|
||||
private String getFullSizedImage(String pageURL) {
|
||||
try {
|
||||
Document doc = Http.url(pageURL).get();
|
||||
return doc.select("img#mainPhoto").attr("src");
|
||||
} catch (IOException e) {
|
||||
// Sleep before fetching image.
|
||||
sleep(IMAGE_SLEEP_TIME);
|
||||
|
||||
Document doc = getPageWithRetries(new URI(pageURL).toURL());
|
||||
|
||||
String framedPhotoUrl = doc.select("img#mainPhoto").attr("data-src");
|
||||
|
||||
// we use a no query param version of the URL to reduce failure rate because of some query params that change between the li elements and the mainPhotoURL
|
||||
String noQueryPhotoUrl = framedPhotoUrl.split("\\?")[0];
|
||||
|
||||
LOGGER.debug("noQueryPhotoUrl: " + noQueryPhotoUrl);
|
||||
|
||||
// we look for a li > a element who's framed attribute starts with the noQueryPhotoUrl (only reference in the page to the full URL)
|
||||
Elements selectedItem = doc.select("ul.thumbs > li > a[framed^='"+noQueryPhotoUrl+"']");
|
||||
|
||||
// the fullsize URL is in the href attribute
|
||||
String fullSizedUrl = selectedItem.attr("href");
|
||||
|
||||
if("".equals(fullSizedUrl))
|
||||
throw new IOException("JSoup full URL extraction failed from '" + selectedItem.html() + "'");
|
||||
|
||||
LOGGER.debug("fullSizedUrl: " + fullSizedUrl);
|
||||
|
||||
return fullSizedUrl;
|
||||
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
LOGGER.debug("Unable to get full size image URL from page: " + pageURL + " because: " + e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to get page, checks for IP ban, waits.
|
||||
* @param url
|
||||
* @return Page document
|
||||
* @throws IOException If page loading errors, or if retries are exhausted
|
||||
*/
|
||||
private Document getPageWithRetries(URL url) throws IOException {
|
||||
Document doc = null;
|
||||
int retries = RETRY_LIMIT;
|
||||
while (true) {
|
||||
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
||||
|
||||
// For debugging rate limit checker. Useful to track wheter the timeout should be altered or not.
|
||||
callsMade++;
|
||||
checkRateLimit();
|
||||
|
||||
LOGGER.info("Retrieving " + url);
|
||||
|
||||
boolean httpCallThrottled = false;
|
||||
int httpAttempts = 0;
|
||||
|
||||
// we attempt the http call, knowing it can fail for network reasons
|
||||
while(true) {
|
||||
httpAttempts++;
|
||||
try {
|
||||
doc = Http.url(url).get();
|
||||
} catch(IOException e) {
|
||||
|
||||
LOGGER.info("Retrieving " + url + " error: " + e.getMessage());
|
||||
|
||||
if(e.getMessage().contains("404"))
|
||||
throw new IOException("Gallery/Page not found!");
|
||||
|
||||
if(httpAttempts < HTTP_RETRY_LIMIT) {
|
||||
sendUpdate(STATUS.DOWNLOAD_WARN, "HTTP call failed: " + e.getMessage() + " retrying " + httpAttempts + " / " + HTTP_RETRY_LIMIT);
|
||||
|
||||
// we sleep for a few seconds
|
||||
sleep(PAGE_SLEEP_TIME);
|
||||
continue;
|
||||
} else {
|
||||
sendUpdate(STATUS.DOWNLOAD_WARN, "HTTP call failed too many times: " + e.getMessage() + " treating this as a throttle");
|
||||
httpCallThrottled = true;
|
||||
}
|
||||
}
|
||||
// no errors, we exit
|
||||
break;
|
||||
}
|
||||
|
||||
if (httpCallThrottled || (doc != null && doc.toString().contains("Your IP made too many requests to our servers and we need to check that you are a real human being"))) {
|
||||
if (retries == 0) {
|
||||
throw new IOException("Hit rate limit and maximum number of retries, giving up");
|
||||
}
|
||||
String message = "Probably hit rate limit while loading " + url + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining";
|
||||
LOGGER.warn(message);
|
||||
sendUpdate(STATUS.DOWNLOAD_WARN, message);
|
||||
retries--;
|
||||
try {
|
||||
Thread.sleep(IP_BLOCK_SLEEP_TIME);
|
||||
} catch (InterruptedException e) {
|
||||
throw new IOException("Interrupted while waiting for rate limit to subside");
|
||||
}
|
||||
} else {
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for debugging the rate limit issue.
|
||||
* This in order to prevent hitting the rate limit altoghether by remaining under the limit threshold.
|
||||
* @return Long duration
|
||||
*/
|
||||
private long checkRateLimit() {
|
||||
long endTime = System.nanoTime();
|
||||
long duration = (endTime - startTime) / 1000000;
|
||||
|
||||
int rateLimitMinute = 100;
|
||||
int rateLimitFiveMinutes = 200;
|
||||
int rateLimitHour = RATE_LIMIT_HOUR; // Request allowed every 3.6 seconds.
|
||||
|
||||
if(duration / 1000 < 60){
|
||||
LOGGER.debug("Rate limit: " + (rateLimitMinute - callsMade) + " calls remaining for first minute mark.");
|
||||
} else if(duration / 1000 < 300){
|
||||
LOGGER.debug("Rate limit: " + (rateLimitFiveMinutes - callsMade) + " calls remaining for first 5 minute mark.");
|
||||
} else if(duration / 1000 < 3600){
|
||||
LOGGER.debug("Rate limit: " + (RATE_LIMIT_HOUR - callsMade) + " calls remaining for first hour mark.");
|
||||
}
|
||||
|
||||
return duration;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -56,11 +58,6 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
for (Element thumb : doc.select("a[target=_blank]")) {
|
||||
@ -79,9 +76,9 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
|
||||
*
|
||||
* Handles case when site has IP-banned the user.
|
||||
*/
|
||||
private class ImagevenueImageThread extends Thread {
|
||||
private URL url;
|
||||
private int index;
|
||||
private class ImagevenueImageThread implements Runnable {
|
||||
private final URL url;
|
||||
private final int index;
|
||||
|
||||
ImagevenueImageThread(URL url, int index) {
|
||||
super();
|
||||
@ -113,8 +110,8 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
prefix = String.format("%03d_", index);
|
||||
}
|
||||
addURLToDownload(new URL(imgsrc), prefix);
|
||||
} catch (IOException e) {
|
||||
addURLToDownload(new URI(imgsrc).toURL(), prefix);
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
|
||||
}
|
||||
}
|
||||
|
@ -40,10 +40,6 @@ public class ImgboxRipper extends AbstractHTMLRipper {
|
||||
"imgbox.com/g/albumid - got " + url + "instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
|
@ -1,10 +1,14 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@ -15,15 +19,15 @@ import org.json.JSONObject;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.safety.Whitelist;
|
||||
import org.jsoup.safety.Safelist;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class ImgurRipper extends AlbumRipper {
|
||||
public class ImgurRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String DOMAIN = "imgur.com",
|
||||
HOST = "imgur";
|
||||
@ -38,7 +42,6 @@ public class ImgurRipper extends AlbumRipper {
|
||||
USER_ALBUM,
|
||||
USER_IMAGES,
|
||||
SINGLE_IMAGE,
|
||||
SERIES_OF_IMAGES,
|
||||
SUBREDDIT
|
||||
}
|
||||
|
||||
@ -58,6 +61,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
return albumType == ALBUM_TYPE.USER;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
if (!url.getHost().endsWith(DOMAIN)) {
|
||||
return false;
|
||||
@ -71,7 +75,24 @@ public class ImgurRipper extends AlbumRipper {
|
||||
return true;
|
||||
}
|
||||
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return DOMAIN;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
// No-op as we override rip() method
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
// No-op as we override rip() method
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
String u = url.toExternalForm();
|
||||
if (u.indexOf('#') >= 0) {
|
||||
u = u.substring(0, u.indexOf('#'));
|
||||
@ -79,11 +100,17 @@ public class ImgurRipper extends AlbumRipper {
|
||||
u = u.replace("imgur.com/gallery/", "imgur.com/a/");
|
||||
u = u.replace("https?://m\\.imgur\\.com", "http://imgur.com");
|
||||
u = u.replace("https?://i\\.imgur\\.com", "http://imgur.com");
|
||||
return new URL(u);
|
||||
return new URI(u).toURL();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
String gid = getGID(url);
|
||||
String gid = null;
|
||||
try {
|
||||
gid = getGID(url);
|
||||
} catch (URISyntaxException e) {
|
||||
throw new MalformedURLException(e.getMessage());
|
||||
}
|
||||
if (this.albumType == ALBUM_TYPE.ALBUM) {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
@ -91,7 +118,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
|
||||
Elements elems = null;
|
||||
Elements elems;
|
||||
|
||||
/*
|
||||
// TODO: Add config option for including username in album title.
|
||||
@ -106,15 +133,13 @@ public class ImgurRipper extends AlbumRipper {
|
||||
}
|
||||
*/
|
||||
|
||||
String title = null;
|
||||
String title;
|
||||
final String defaultTitle1 = "Imgur: The most awesome images on the Internet";
|
||||
final String defaultTitle2 = "Imgur: The magic of the Internet";
|
||||
LOGGER.info("Trying to get album title");
|
||||
elems = albumDoc.select("meta[property=og:title]");
|
||||
if (elems != null) {
|
||||
title = elems.attr("content");
|
||||
LOGGER.debug("Title is " + title);
|
||||
}
|
||||
title = elems.attr("content");
|
||||
LOGGER.debug("Title is " + title);
|
||||
// This is here encase the album is unnamed, to prevent
|
||||
// Imgur: The most awesome images on the Internet from being added onto the album name
|
||||
if (title.contains(defaultTitle1) || title.contains(defaultTitle2)) {
|
||||
@ -124,27 +149,17 @@ public class ImgurRipper extends AlbumRipper {
|
||||
title = "";
|
||||
LOGGER.debug("Trying to use title tag to get title");
|
||||
elems = albumDoc.select("title");
|
||||
if (elems != null) {
|
||||
if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
|
||||
LOGGER.debug("Was unable to get album title or album was untitled");
|
||||
}
|
||||
else {
|
||||
title = elems.text();
|
||||
}
|
||||
if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
|
||||
LOGGER.debug("Was unable to get album title or album was untitled");
|
||||
}
|
||||
else {
|
||||
title = elems.text();
|
||||
}
|
||||
}
|
||||
|
||||
String albumTitle = "imgur_";
|
||||
/*
|
||||
// TODO: Add config option (see above)
|
||||
if (user != null) {
|
||||
albumTitle += "user_" + user;
|
||||
}
|
||||
*/
|
||||
albumTitle += gid;
|
||||
if (title != null) {
|
||||
albumTitle += "_" + title;
|
||||
}
|
||||
albumTitle += "_" + title;
|
||||
|
||||
return albumTitle;
|
||||
} catch (IOException e) {
|
||||
@ -156,118 +171,83 @@ public class ImgurRipper extends AlbumRipper {
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
switch (albumType) {
|
||||
case ALBUM:
|
||||
// Fall-through
|
||||
case USER_ALBUM:
|
||||
LOGGER.info("Album type is USER_ALBUM");
|
||||
// Don't call getAlbumTitle(this.url) with this
|
||||
// as it seems to cause the album to be downloaded to a subdir.
|
||||
ripAlbum(this.url);
|
||||
break;
|
||||
case SERIES_OF_IMAGES:
|
||||
LOGGER.info("Album type is SERIES_OF_IMAGES");
|
||||
ripAlbum(this.url);
|
||||
break;
|
||||
case SINGLE_IMAGE:
|
||||
LOGGER.info("Album type is SINGLE_IMAGE");
|
||||
ripSingleImage(this.url);
|
||||
break;
|
||||
case USER:
|
||||
LOGGER.info("Album type is USER");
|
||||
ripUserAccount(url);
|
||||
break;
|
||||
case SUBREDDIT:
|
||||
LOGGER.info("Album type is SUBREDDIT");
|
||||
ripSubreddit(url);
|
||||
break;
|
||||
case USER_IMAGES:
|
||||
LOGGER.info("Album type is USER_IMAGES");
|
||||
ripUserImages(url);
|
||||
break;
|
||||
try {
|
||||
switch (albumType) {
|
||||
case ALBUM:
|
||||
// Fall-through
|
||||
case USER_ALBUM:
|
||||
LOGGER.info("Album type is USER_ALBUM");
|
||||
// Don't call getAlbumTitle(this.url) with this
|
||||
// as it seems to cause the album to be downloaded to a subdir.
|
||||
ripAlbum(this.url);
|
||||
break;
|
||||
case SINGLE_IMAGE:
|
||||
LOGGER.info("Album type is SINGLE_IMAGE");
|
||||
ripSingleImage(this.url);
|
||||
break;
|
||||
case USER:
|
||||
LOGGER.info("Album type is USER");
|
||||
ripUserAccount(url);
|
||||
break;
|
||||
case SUBREDDIT:
|
||||
LOGGER.info("Album type is SUBREDDIT");
|
||||
ripSubreddit(url);
|
||||
break;
|
||||
case USER_IMAGES:
|
||||
LOGGER.info("Album type is USER_IMAGES");
|
||||
ripUserImages(url);
|
||||
break;
|
||||
}
|
||||
} catch (URISyntaxException e) {
|
||||
throw new IOException("Failed ripping " + this.url, e);
|
||||
}
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
private void ripSingleImage(URL url) throws IOException {
|
||||
private void ripSingleImage(URL url) throws IOException, URISyntaxException {
|
||||
String strUrl = url.toExternalForm();
|
||||
Document document = getDocument(strUrl);
|
||||
Matcher m = getEmbeddedJsonMatcher(document);
|
||||
if (m.matches()) {
|
||||
JSONObject json = new JSONObject(m.group(1)).getJSONObject("image");
|
||||
addURLToDownload(extractImageUrlFromJson(json), "");
|
||||
var gid = getGID(url);
|
||||
var json = getSingleImageData(String.format("https://api.imgur.com/post/v1/media/%s?include=media,adconfig,account", gid));
|
||||
var media = json.getJSONArray("media");
|
||||
if (media.length()==0) {
|
||||
throw new IOException(String.format("Failed to fetch image for url %s", strUrl));
|
||||
}
|
||||
if (media.length()>1) {
|
||||
LOGGER.warn(String.format("Got multiple images for url %s", strUrl));
|
||||
}
|
||||
addURLToDownload(extractImageUrlFromJson((JSONObject)media.get(0)), "");
|
||||
}
|
||||
|
||||
private void ripAlbum(URL url) throws IOException {
|
||||
private void ripAlbum(URL url) throws IOException, URISyntaxException {
|
||||
ripAlbum(url, "");
|
||||
}
|
||||
|
||||
private void ripAlbum(URL url, String subdirectory) throws IOException {
|
||||
int index = 0;
|
||||
private void ripAlbum(URL url, String subdirectory) throws IOException, URISyntaxException {
|
||||
int index;
|
||||
this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
||||
index = 0;
|
||||
ImgurAlbum album = getImgurAlbum(url);
|
||||
for (ImgurImage imgurImage : album.images) {
|
||||
stopCheck();
|
||||
String saveAs = workingDir.getCanonicalPath();
|
||||
if (!saveAs.endsWith(File.separator)) {
|
||||
saveAs += File.separator;
|
||||
}
|
||||
Path saveAs = workingDir.toPath();
|
||||
if (subdirectory != null && !subdirectory.equals("")) {
|
||||
saveAs += subdirectory;
|
||||
saveAs = saveAs.resolve(subdirectory);
|
||||
}
|
||||
if (!saveAs.endsWith(File.separator)) {
|
||||
saveAs += File.separator;
|
||||
}
|
||||
File subdirFile = new File(saveAs);
|
||||
if (!subdirFile.exists()) {
|
||||
subdirFile.mkdirs();
|
||||
if (!Files.exists(saveAs)) {
|
||||
Files.createDirectory(saveAs);
|
||||
}
|
||||
index += 1;
|
||||
var imgPath = imgurImage.getSaveAs().replaceAll("\\?\\d", "");
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
saveAs += String.format("%03d_", index);
|
||||
saveAs = saveAs.resolve(String.format("%03d_%s", index, imgPath));
|
||||
} else {
|
||||
saveAs = saveAs.resolve(imgPath);
|
||||
}
|
||||
saveAs += imgurImage.getSaveAs();
|
||||
saveAs = saveAs.replaceAll("\\?\\d", "");
|
||||
addURLToDownload(imgurImage.url, new File(saveAs));
|
||||
addURLToDownload(imgurImage.url, saveAs);
|
||||
}
|
||||
}
|
||||
|
||||
public static ImgurAlbum getImgurSeries(URL url) throws IOException {
|
||||
Pattern p = Pattern.compile("^.*imgur\\.com/([a-zA-Z0-9,]*).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
ImgurAlbum album = new ImgurAlbum(url);
|
||||
if (m.matches()) {
|
||||
String[] imageIds = m.group(1).split(",");
|
||||
for (String imageId : imageIds) {
|
||||
// TODO: Fetch image with ID imageId
|
||||
LOGGER.debug("Fetching image info for ID " + imageId);
|
||||
try {
|
||||
JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
|
||||
if (!json.has("image")) {
|
||||
continue;
|
||||
}
|
||||
JSONObject image = json.getJSONObject("image");
|
||||
if (!image.has("links")) {
|
||||
continue;
|
||||
}
|
||||
JSONObject links = image.getJSONObject("links");
|
||||
if (!links.has("original")) {
|
||||
continue;
|
||||
}
|
||||
String original = links.getString("original");
|
||||
ImgurImage theImage = new ImgurImage(new URL(original));
|
||||
album.addImage(theImage);
|
||||
} catch (Exception e) {
|
||||
LOGGER.error("Got exception while fetching imgur ID " + imageId, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return album;
|
||||
}
|
||||
|
||||
public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
|
||||
public static ImgurAlbum getImgurAlbum(URL url) throws IOException, URISyntaxException {
|
||||
String strUrl = url.toExternalForm();
|
||||
if (!strUrl.contains(",")) {
|
||||
strUrl += "/all";
|
||||
@ -275,13 +255,11 @@ public class ImgurRipper extends AlbumRipper {
|
||||
LOGGER.info(" Retrieving " + strUrl);
|
||||
Document doc = getAlbumData("https://api.imgur.com/3/album/" + strUrl.split("/a/")[1]);
|
||||
// Try to use embedded JSON to retrieve images
|
||||
LOGGER.info(Jsoup.clean(doc.body().toString(), Whitelist.none()));
|
||||
|
||||
try {
|
||||
JSONObject json = new JSONObject(Jsoup.clean(doc.body().toString(), Whitelist.none()));
|
||||
JSONObject json = new JSONObject(Jsoup.clean(doc.body().toString(), Safelist.none()));
|
||||
JSONArray jsonImages = json.getJSONObject("data").getJSONArray("images");
|
||||
return createImgurAlbumFromJsonArray(url, jsonImages);
|
||||
} catch (JSONException e) {
|
||||
} catch (JSONException | URISyntaxException e) {
|
||||
LOGGER.debug("Error while parsing JSON at " + url + ", continuing", e);
|
||||
}
|
||||
|
||||
@ -309,54 +287,48 @@ public class ImgurRipper extends AlbumRipper {
|
||||
image = "http:" + thumb.select("img").attr("src");
|
||||
} else {
|
||||
// Unable to find image in this div
|
||||
LOGGER.error("[!] Unable to find image in div: " + thumb.toString());
|
||||
LOGGER.error("[!] Unable to find image in div: " + thumb);
|
||||
continue;
|
||||
}
|
||||
if (image.endsWith(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
|
||||
image = image.replace(".gif", ".mp4");
|
||||
}
|
||||
ImgurImage imgurImage = new ImgurImage(new URL(image));
|
||||
ImgurImage imgurImage = new ImgurImage(new URI(image).toURL());
|
||||
imgurAlbum.addImage(imgurImage);
|
||||
}
|
||||
return imgurAlbum;
|
||||
}
|
||||
|
||||
private static Matcher getEmbeddedJsonMatcher(Document doc) {
|
||||
Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL);
|
||||
return p.matcher(doc.body().html());
|
||||
}
|
||||
|
||||
private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException {
|
||||
private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException, URISyntaxException {
|
||||
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
|
||||
int imagesLength = jsonImages.length();
|
||||
for (int i = 0; i < imagesLength; i++) {
|
||||
JSONObject ob = jsonImages.getJSONObject(i);
|
||||
imgurAlbum.addImage(new ImgurImage( new URL(ob.getString("link"))));
|
||||
imgurAlbum.addImage(new ImgurImage( new URI(ob.getString("link")).toURL()));
|
||||
}
|
||||
return imgurAlbum;
|
||||
}
|
||||
|
||||
private static ImgurImage createImgurImageFromJson(JSONObject json) throws MalformedURLException {
|
||||
return new ImgurImage(extractImageUrlFromJson(json));
|
||||
}
|
||||
|
||||
private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException {
|
||||
private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException, URISyntaxException {
|
||||
String ext = json.getString("ext");
|
||||
if (!ext.startsWith(".")) {
|
||||
ext = "." + ext;
|
||||
}
|
||||
if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
|
||||
ext = ".mp4";
|
||||
}
|
||||
return new URL(
|
||||
"http://i.imgur.com/"
|
||||
+ json.getString("hash")
|
||||
+ ext);
|
||||
return new URI(
|
||||
"https://i.imgur.com/"
|
||||
+ json.getString("id")
|
||||
+ ext).toURL();
|
||||
}
|
||||
|
||||
private static Document getDocument(String strUrl) throws IOException {
|
||||
return Jsoup.connect(strUrl)
|
||||
private static JSONObject getSingleImageData(String strUrl) throws IOException {
|
||||
return Http.url(strUrl)
|
||||
.userAgent(USER_AGENT)
|
||||
.timeout(10 * 1000)
|
||||
.maxBodySize(0)
|
||||
.get();
|
||||
.header("Authorization", "Client-ID " + Utils.getConfigString("imgur.client_id", "546c25a59c58ad7"))
|
||||
.getJSON();
|
||||
}
|
||||
|
||||
private static Document getAlbumData(String strUrl) throws IOException {
|
||||
@ -369,35 +341,71 @@ public class ImgurRipper extends AlbumRipper {
|
||||
.get();
|
||||
}
|
||||
|
||||
private static JSONObject getUserData(String userUrl) throws IOException {
|
||||
return Http.url(userUrl)
|
||||
.userAgent(USER_AGENT)
|
||||
.timeout(10 * 1000)
|
||||
.header("Authorization", "Client-ID " + Utils.getConfigString("imgur.client_id", "546c25a59c58ad7"))
|
||||
.getJSON();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Rips all albums in an imgur user's account.
|
||||
* @param url
|
||||
* URL to imgur user account (http://username.imgur.com)
|
||||
* @throws IOException
|
||||
* URL to imgur user account (http://username.imgur.com | https://imgur.com/user/username)
|
||||
*/
|
||||
private void ripUserAccount(URL url) throws IOException {
|
||||
private void ripUserAccount(URL url) throws IOException, URISyntaxException {
|
||||
int cPage = -1, cImage = 0;
|
||||
String apiUrl = "https://api.imgur.com/3/account/%s/submissions/%d/newest?album_previews=1";
|
||||
// Strip 'user_' from username
|
||||
var username = getGID(url).replace("user_", "");
|
||||
LOGGER.info("Retrieving " + url);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
||||
Document doc = Http.url(url).get();
|
||||
for (Element album : doc.select("div.cover a")) {
|
||||
stopCheck();
|
||||
if (!album.hasAttr("href")
|
||||
|| !album.attr("href").contains("imgur.com/a/")) {
|
||||
continue;
|
||||
|
||||
while (true) {
|
||||
cPage += 1;
|
||||
var pageUrl = String.format(apiUrl, username, cPage);
|
||||
var json = getUserData(pageUrl);
|
||||
var success = json.getBoolean("success");
|
||||
var status = json.getInt("status");
|
||||
if (!success || status!=200) {
|
||||
throw new IOException(String.format("Unexpected status code %d for url %s and page %d", status, url, cPage));
|
||||
}
|
||||
String albumID = album.attr("href").substring(album.attr("href").lastIndexOf('/') + 1);
|
||||
URL albumURL = new URL("http:" + album.attr("href") + "/noscript");
|
||||
try {
|
||||
ripAlbum(albumURL, albumID);
|
||||
Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000);
|
||||
} catch (Exception e) {
|
||||
LOGGER.error("Error while ripping album: " + e.getMessage(), e);
|
||||
var data = json.getJSONArray("data");
|
||||
if (data.isEmpty()) {
|
||||
// Data array is empty for pages beyond the last page
|
||||
break;
|
||||
}
|
||||
for (int i = 0; i < data.length(); i++) {
|
||||
cImage += 1;
|
||||
String prefixOrSubdir = "";
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
prefixOrSubdir = String.format("%03d_", cImage);
|
||||
}
|
||||
var d = (JSONObject)data.get(i);
|
||||
var l = d.getString("link");
|
||||
if (d.getBoolean("is_album")) {
|
||||
// For album links with multiple images create a prefixed folder with album id
|
||||
prefixOrSubdir += d.getString("id");
|
||||
ripAlbum(new URI(l).toURL(), prefixOrSubdir);
|
||||
try {
|
||||
Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000L);
|
||||
} catch (InterruptedException e) {
|
||||
LOGGER.error(String.format("Error! Interrupted ripping album %s for user account %s", l, username), e);
|
||||
}
|
||||
} else {
|
||||
// For direct links
|
||||
if (d.has("mp4") && Utils.getConfigBoolean("prefer.mp4", false)) {
|
||||
l = d.getString("mp4");
|
||||
}
|
||||
addURLToDownload(new URI(l).toURL(), prefixOrSubdir);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void ripUserImages(URL url) throws IOException {
|
||||
private void ripUserImages(URL url) {
|
||||
int page = 0; int imagesFound = 0; int imagesTotal = 0;
|
||||
String jsonUrl = url.toExternalForm().replace("/all", "/ajax/images");
|
||||
if (jsonUrl.contains("#")) {
|
||||
@ -417,12 +425,12 @@ public class ImgurRipper extends AlbumRipper {
|
||||
for (int i = 0; i < images.length(); i++) {
|
||||
imagesFound++;
|
||||
JSONObject image = images.getJSONObject(i);
|
||||
String imageUrl = "http://i.imgur.com/" + image.getString("hash") + image.getString("ext");
|
||||
String imageUrl = "https://i.imgur.com/" + image.getString("hash") + image.getString("ext");
|
||||
String prefix = "";
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
prefix = String.format("%03d_", imagesFound);
|
||||
}
|
||||
addURLToDownload(new URL(imageUrl), prefix);
|
||||
addURLToDownload(new URI(imageUrl).toURL(), prefix);
|
||||
}
|
||||
if (imagesFound >= imagesTotal) {
|
||||
break;
|
||||
@ -435,7 +443,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
}
|
||||
}
|
||||
|
||||
private void ripSubreddit(URL url) throws IOException {
|
||||
private void ripSubreddit(URL url) throws IOException, URISyntaxException {
|
||||
int page = 0;
|
||||
while (true) {
|
||||
stopCheck();
|
||||
@ -455,7 +463,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
if (image.contains("b.")) {
|
||||
image = image.replace("b.", ".");
|
||||
}
|
||||
URL imageURL = new URL(image);
|
||||
URL imageURL = new URI(image).toURL();
|
||||
addURLToDownload(imageURL);
|
||||
}
|
||||
if (imgs.isEmpty()) {
|
||||
@ -477,29 +485,30 @@ public class ImgurRipper extends AlbumRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = null;
|
||||
Matcher m = null;
|
||||
public String getGID(URL url) throws MalformedURLException, URISyntaxException {
|
||||
Pattern p;
|
||||
Matcher m;
|
||||
|
||||
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery)/([a-zA-Z0-9]{5,}).*$");
|
||||
p = Pattern.compile("^https?://(?:www\\.|m\\.)?imgur\\.com/gallery/(?:(?:[a-zA-Z0-9]*/)?.*-)?([a-zA-Z0-9]+)$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
// Imgur album or gallery
|
||||
albumType = ALBUM_TYPE.ALBUM;
|
||||
String gid = m.group(m.groupCount());
|
||||
this.url = new URL("http://imgur.com/a/" + gid);
|
||||
this.url = new URI("https://imgur.com/a/" + gid).toURL();
|
||||
return gid;
|
||||
}
|
||||
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery|t)/[a-zA-Z0-9]*/([a-zA-Z0-9]{5,}).*$");
|
||||
// Match urls with path /a
|
||||
p = Pattern.compile("^https?://(?:www\\.|m\\.)?imgur\\.com/(?:a|t)/(?:(?:[a-zA-Z0-9]*/)?.*-)?([a-zA-Z0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
// Imgur album or gallery
|
||||
albumType = ALBUM_TYPE.ALBUM;
|
||||
String gid = m.group(m.groupCount());
|
||||
this.url = new URL("http://imgur.com/a/" + gid);
|
||||
this.url = new URI("https://imgur.com/a/" + gid).toURL();
|
||||
return gid;
|
||||
}
|
||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/?$");
|
||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{4,})\\.imgur\\.com/?$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
// Root imgur account
|
||||
@ -510,6 +519,14 @@ public class ImgurRipper extends AlbumRipper {
|
||||
albumType = ALBUM_TYPE.USER;
|
||||
return "user_" + gid;
|
||||
}
|
||||
// Pattern for new imgur user url https://imgur.com/user/username
|
||||
p = Pattern.compile("^https?://(?:www\\.|m\\.)?imgur\\.com/user/([a-zA-Z0-9]+).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
String gid = m.group(1);
|
||||
albumType = ALBUM_TYPE.USER;
|
||||
return "user_" + gid;
|
||||
}
|
||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/all.*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
@ -529,13 +546,13 @@ public class ImgurRipper extends AlbumRipper {
|
||||
if (m.matches()) {
|
||||
// Imgur subreddit aggregator
|
||||
albumType = ALBUM_TYPE.SUBREDDIT;
|
||||
String album = m.group(2);
|
||||
StringBuilder album = new StringBuilder(m.group(2));
|
||||
for (int i = 3; i <= m.groupCount(); i++) {
|
||||
if (m.group(i) != null) {
|
||||
album += "_" + m.group(i).replace("/", "");
|
||||
album.append("_").append(m.group(i).replace("/", ""));
|
||||
}
|
||||
}
|
||||
return album;
|
||||
return album.toString();
|
||||
}
|
||||
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/r/(\\w+)/([a-zA-Z0-9,]{5,}).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
@ -544,7 +561,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
albumType = ALBUM_TYPE.ALBUM;
|
||||
String subreddit = m.group(m.groupCount() - 1);
|
||||
String gid = m.group(m.groupCount());
|
||||
this.url = new URL("http://imgur.com/r/" + subreddit + "/" + gid);
|
||||
this.url = new URI("https://imgur.com/r/" + subreddit + "/" + gid).toURL();
|
||||
return "r_" + subreddit + "_" + gid;
|
||||
}
|
||||
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9]{5,})$");
|
||||
@ -554,29 +571,14 @@ public class ImgurRipper extends AlbumRipper {
|
||||
albumType = ALBUM_TYPE.SINGLE_IMAGE;
|
||||
return m.group(m.groupCount());
|
||||
}
|
||||
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
// Series of imgur images
|
||||
albumType = ALBUM_TYPE.SERIES_OF_IMAGES;
|
||||
String gid = m.group(m.groupCount());
|
||||
if (!gid.contains(",")) {
|
||||
throw new MalformedURLException("Imgur image doesn't contain commas");
|
||||
}
|
||||
return gid.replaceAll(",", "-");
|
||||
}
|
||||
throw new MalformedURLException("Unsupported imgur URL format: " + url.toExternalForm());
|
||||
}
|
||||
|
||||
public ALBUM_TYPE getAlbumType() {
|
||||
return albumType;
|
||||
}
|
||||
|
||||
public static class ImgurImage {
|
||||
String title = "";
|
||||
String description = "";
|
||||
String extension = "";
|
||||
public URL url = null;
|
||||
String extension;
|
||||
public URL url;
|
||||
|
||||
ImgurImage(URL url) {
|
||||
this.url = url;
|
||||
@ -586,14 +588,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
this.extension = this.extension.substring(0, this.extension.indexOf("?"));
|
||||
}
|
||||
}
|
||||
ImgurImage(URL url, String title) {
|
||||
this(url);
|
||||
this.title = title;
|
||||
}
|
||||
public ImgurImage(URL url, String title, String description) {
|
||||
this(url, title);
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
String getSaveAs() {
|
||||
String saveAs = this.title;
|
||||
String u = url.toExternalForm();
|
||||
@ -613,7 +608,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
|
||||
public static class ImgurAlbum {
|
||||
String title = null;
|
||||
public URL url = null;
|
||||
public URL url;
|
||||
public List<ImgurImage> images = new ArrayList<>();
|
||||
ImgurAlbum(URL url) {
|
||||
this.url = url;
|
||||
|
@ -1,18 +1,13 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.oracle.js.parser.ErrorManager;
|
||||
import com.oracle.js.parser.Parser;
|
||||
import com.oracle.js.parser.ScriptEnvironment;
|
||||
import com.oracle.js.parser.Source;
|
||||
import com.oracle.js.parser.ir.*;
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import jdk.nashorn.internal.ir.Block;
|
||||
import jdk.nashorn.internal.ir.CallNode;
|
||||
import jdk.nashorn.internal.ir.ExpressionStatement;
|
||||
import jdk.nashorn.internal.ir.FunctionNode;
|
||||
import jdk.nashorn.internal.ir.Statement;
|
||||
import jdk.nashorn.internal.parser.Parser;
|
||||
import jdk.nashorn.internal.runtime.Context;
|
||||
import jdk.nashorn.internal.runtime.ErrorManager;
|
||||
import jdk.nashorn.internal.runtime.Source;
|
||||
import jdk.nashorn.internal.runtime.options.Options;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection;
|
||||
@ -26,12 +21,7 @@ import java.time.Instant;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Spliterators;
|
||||
import java.util.*;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
@ -176,13 +166,17 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
if (postRip) {
|
||||
return null;
|
||||
}
|
||||
Predicate<String> hrefFilter = (storiesRip || pinnedReelRip) ? href -> href.contains("Consumer.js") :
|
||||
href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
|
||||
|
||||
Predicate<String> hrefFilter = href -> href.contains("Consumer.js");
|
||||
if (taggedRip) {
|
||||
hrefFilter = href -> href.contains("ProfilePageContainer.js") || href.contains("TagPageContainer.js");
|
||||
}
|
||||
|
||||
String href = doc.select("link[rel=preload]").stream()
|
||||
.map(link -> link.attr("href"))
|
||||
.filter(hrefFilter)
|
||||
.findFirst().orElse("");
|
||||
.map(link -> link.attr("href"))
|
||||
.filter(hrefFilter)
|
||||
.findFirst().orElse("");
|
||||
|
||||
String body = Http.url("https://www.instagram.com" + href).cookies(cookies).response().body();
|
||||
|
||||
Function<String, String> hashExtractor =
|
||||
@ -198,7 +192,8 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
private String getProfileHash(String jsData) {
|
||||
return getHashValue(jsData, "loadProfilePageExtras", -1);
|
||||
return getHashValue(jsData, "loadProfilePageExtras", -1,
|
||||
s -> s.replaceAll(".*queryId\\s?:\\s?\"([0-9a-f]*)\".*", "$1"));
|
||||
}
|
||||
|
||||
private String getPinnedHash(String jsData) {
|
||||
@ -386,7 +381,7 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
case "GraphSidecar":
|
||||
JSONArray sideCar = getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges");
|
||||
return getStreamOfJsonArray(sideCar).map(object -> object.getJSONObject("node"))
|
||||
.flatMap(this::parseRootForUrls);
|
||||
.flatMap(this::parseRootForUrls);
|
||||
default:
|
||||
return Stream.empty();
|
||||
}
|
||||
@ -413,26 +408,35 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
|
||||
// Javascript parsing
|
||||
/* ------------------------------------------------------------------------------------------------------- */
|
||||
private String getHashValue(String javaScriptData, String keyword, int offset) {
|
||||
private String getHashValue(String javaScriptData, String keyword, int offset,
|
||||
Function<String, String> extractHash) {
|
||||
List<Statement> statements = getJsBodyBlock(javaScriptData).getStatements();
|
||||
|
||||
return statements.stream()
|
||||
.flatMap(statement -> filterItems(statement, ExpressionStatement.class))
|
||||
.map(ExpressionStatement::getExpression)
|
||||
.flatMap(expression -> filterItems(expression, CallNode.class))
|
||||
.map(CallNode::getArgs)
|
||||
.map(expressions -> expressions.get(0))
|
||||
.flatMap(expression -> filterItems(expression, FunctionNode.class))
|
||||
.map(FunctionNode::getBody)
|
||||
.map(Block::getStatements)
|
||||
.map(statementList -> lookForHash(statementList, keyword, offset))
|
||||
.filter(Objects::nonNull)
|
||||
.findFirst().orElse(null);
|
||||
.flatMap(statement -> filterItems(statement, ExpressionStatement.class))
|
||||
.map(ExpressionStatement::getExpression)
|
||||
.flatMap(expression -> filterItems(expression, CallNode.class))
|
||||
.map(CallNode::getArgs)
|
||||
.map(expressions -> expressions.get(0))
|
||||
.flatMap(expression -> filterItems(expression, FunctionNode.class))
|
||||
.map(FunctionNode::getBody)
|
||||
.map(Block::getStatements)
|
||||
.map(statementList -> lookForHash(statementList, keyword, offset, extractHash))
|
||||
.filter(Objects::nonNull)
|
||||
.findFirst().orElse(null);
|
||||
}
|
||||
|
||||
private String lookForHash(List<Statement> list, String keyword, int offset) {
|
||||
private String getHashValue(String javaScriptData, String keyword, int offset) {
|
||||
return getHashValue(javaScriptData, keyword, offset, null);
|
||||
}
|
||||
|
||||
private String lookForHash(List<Statement> list, String keyword, int offset, Function<String, String> extractHash) {
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
Statement st = list.get(i);
|
||||
if (st.toString().contains(keyword)) {
|
||||
if (extractHash != null) {
|
||||
return extractHash.apply(list.get(i + offset).toString());
|
||||
}
|
||||
return list.get(i + offset).toString().replaceAll(".*\"([0-9a-f]*)\".*", "$1");
|
||||
}
|
||||
}
|
||||
@ -444,9 +448,10 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
private Block getJsBodyBlock(String javaScriptData) {
|
||||
ErrorManager errors = new ErrorManager();
|
||||
Context context = new Context(new Options("nashorn"), errors, Thread.currentThread().getContextClassLoader());
|
||||
return new Parser(context.getEnv(), Source.sourceFor("name", javaScriptData), errors).parse().getBody();
|
||||
ScriptEnvironment env = ScriptEnvironment.builder().ecmaScriptVersion(10).constAsVar(true).build();
|
||||
ErrorManager errorManager = new ErrorManager.ThrowErrorManager();
|
||||
Source src = Source.sourceFor("name", javaScriptData);
|
||||
return new Parser(env, src, errorManager).parse().getBody();
|
||||
}
|
||||
|
||||
// Some JSON helper methods below
|
||||
|
@ -55,12 +55,6 @@ public class JabArchivesRipper extends AbstractHTMLRipper {
|
||||
"jabarchives.com/main/view/albumname - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// Find next page
|
||||
|
@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -40,12 +42,6 @@ public class JagodibujaRipper extends AbstractHTMLRipper {
|
||||
throw new MalformedURLException("Expected jagodibuja.com gallery formats hwww.jagodibuja.com/Comic name/ got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
@ -62,8 +58,8 @@ public class JagodibujaRipper extends AbstractHTMLRipper {
|
||||
Element elem = comicPage.select("span.full-size-link > a").first();
|
||||
LOGGER.info("Got link " + elem.attr("href"));
|
||||
try {
|
||||
addURLToDownload(new URL(elem.attr("href")), "");
|
||||
} catch (MalformedURLException e) {
|
||||
addURLToDownload(new URI(elem.attr("href")).toURL(), "");
|
||||
} catch (MalformedURLException | URISyntaxException e) {
|
||||
LOGGER.warn("Malformed URL");
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
@ -0,0 +1,70 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class Jpg3Ripper extends AbstractHTMLRipper {
|
||||
|
||||
public Jpg3Ripper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "jpg3.su";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "jpg3";
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> urls = new ArrayList<>();
|
||||
|
||||
for (Element el : page.select(".image-container > img")) {
|
||||
urls.add(el.attr("src").replaceAll("\\.md", ""));
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
|
||||
String u = url.toExternalForm();
|
||||
u = u.replaceAll("https?://jpg3.su/a/([^/]+)/?.*", "https://jpg3.su/a/$1");
|
||||
LOGGER.debug("Changed URL from " + url + " to " + u);
|
||||
return new URI(u).toURL();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document page) throws IOException, URISyntaxException {
|
||||
String href = page.select("[data-pagination='next']").attr("href");
|
||||
if (!href.isEmpty()) {
|
||||
return Http.url(href).get();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
return url.toString().split("/")[url.toString().split("/").length - 1];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
|
||||
}
|
||||
}
|
@ -14,49 +14,38 @@ import org.jsoup.nodes.Element;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class PorncomixDotOneRipper extends AbstractHTMLRipper {
|
||||
public class KingcomixRipper extends AbstractHTMLRipper {
|
||||
|
||||
public PorncomixDotOneRipper(URL url) throws IOException {
|
||||
public KingcomixRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "porncomix";
|
||||
return "kingcomix";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "porncomix.one";
|
||||
return "kingcomix.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("https?://www.porncomix.one/gallery/([a-zA-Z0-9_\\-]*)/?$");
|
||||
Pattern p = Pattern.compile("https://kingcomix.com/([a-zA-Z1-9_-]*)/?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected proncomix URL format: " +
|
||||
"porncomix.one/gallery/comic - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
throw new MalformedURLException("Expected kingcomix URL format: " +
|
||||
"kingcomix.com/COMIX - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<>();
|
||||
// We have 2 loops here to cover all the different album types
|
||||
for (Element el : doc.select(".dgwt-jg-item > a")) {
|
||||
result.add(el.attr("href"));
|
||||
}
|
||||
for (Element el : doc.select(".unite-gallery > img")) {
|
||||
result.add(el.attr("data-image"));
|
||||
|
||||
for (Element el : doc.select("div.entry-content > p > img")) {
|
||||
result.add(el.attr("src"));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -65,4 +54,4 @@ public class PorncomixDotOneRipper extends AbstractHTMLRipper {
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
||||
}
|
@ -1,234 +1,236 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @author Tushar
|
||||
*
|
||||
*/
|
||||
public class ListalRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Pattern p1 = Pattern.compile("https:\\/\\/www.listal.com\\/list\\/([a-zA-Z0-9-]+)");
|
||||
private Pattern p2 =
|
||||
Pattern.compile("https:\\/\\/www.listal.com\\/((?:(?:[a-zA-Z0-9-]+)\\/?)+)");
|
||||
private String listId = null; // listId to get more images via POST.
|
||||
private String postUrl = "https://www.listal.com/item-list/"; //to load more images.
|
||||
private UrlType urlType = UrlType.UNKNOWN;
|
||||
|
||||
private DownloadThreadPool listalThreadPool = new DownloadThreadPool("listalThreadPool");
|
||||
|
||||
public ListalRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "listal.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "listal";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
Document doc = Http.url(url).get();
|
||||
if (urlType == UrlType.LIST) {
|
||||
listId = doc.select("#customlistitems").first().attr("data-listid"); // Used for list types.
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
if (urlType == UrlType.LIST) {
|
||||
// for url of type LIST, https://www.listal.com/list/my-list
|
||||
return getURLsForListType(page);
|
||||
} else if (urlType == UrlType.FOLDER) {
|
||||
// for url of type FOLDER, https://www.listal.com/jim-carrey/pictures
|
||||
return getURLsForFolderType(page);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
listalThreadPool.addThread(new ListalImageDownloadThread(url, index));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Matcher m1 = p1.matcher(url.toExternalForm());
|
||||
if (m1.matches()) {
|
||||
// Return the text contained between () in the regex
|
||||
urlType = UrlType.LIST;
|
||||
return m1.group(1);
|
||||
}
|
||||
|
||||
Matcher m2 = p2.matcher(url.toExternalForm());
|
||||
if (m2.matches()) {
|
||||
// Return only gid from capturing group of type listal.com/tvOrSomething/dexter/pictures
|
||||
urlType = UrlType.FOLDER;
|
||||
return getFolderTypeGid(m2.group(1));
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected listal.com URL format: "
|
||||
+ "listal.com/list/my-list-name - got " + url + " instead.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document page) throws IOException {
|
||||
Document nextPage = super.getNextPage(page);
|
||||
switch (urlType) {
|
||||
case LIST:
|
||||
if (!page.select(".loadmoreitems").isEmpty()) {
|
||||
// All items are not loaded.
|
||||
// Load remaining items using postUrl.
|
||||
|
||||
String offSet = page.select(".loadmoreitems").last().attr("data-offset");
|
||||
Map<String, String> postParams = new HashMap<>();
|
||||
postParams.put("listid", listId);
|
||||
postParams.put("offset", offSet);
|
||||
try {
|
||||
nextPage = Http.url(postUrl).data(postParams).retries(3).post();
|
||||
} catch (IOException e1) {
|
||||
LOGGER.error("Failed to load more images after " + offSet, e1);
|
||||
throw e1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case FOLDER:
|
||||
Elements pageLinks = page.select(".pages a");
|
||||
if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) {
|
||||
String nextUrl = pageLinks.last().attr("abs:href");
|
||||
nextPage = Http.url(nextUrl).retries(3).get();
|
||||
}
|
||||
break;
|
||||
|
||||
case UNKNOWN:
|
||||
default:
|
||||
}
|
||||
return nextPage;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return listalThreadPool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the image urls for UrlType LIST.
|
||||
*/
|
||||
private List<String> getURLsForListType(Document page) {
|
||||
List<String> list = new ArrayList<>();
|
||||
for (Element e : page.select(".pure-g a[href*=viewimage]")) {
|
||||
//list.add("https://www.listal.com" + e.attr("href") + "h");
|
||||
list.add(e.attr("abs:href") + "h");
|
||||
}
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the image urls for UrlType FOLDER.
|
||||
*/
|
||||
private List<String> getURLsForFolderType(Document page) {
|
||||
List<String> list = new ArrayList<>();
|
||||
for (Element e : page.select("#browseimagescontainer .imagewrap-outer a")) {
|
||||
list.add(e.attr("abs:href") + "h");
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the gid for url type listal.com/tvOrSomething/dexter/pictures
|
||||
*/
|
||||
public String getFolderTypeGid(String group) throws MalformedURLException {
|
||||
String[] folders = group.split("/");
|
||||
try {
|
||||
if (folders.length == 2 && folders[1].equals("pictures")) {
|
||||
// Url is probably for an actor.
|
||||
return folders[0];
|
||||
}
|
||||
|
||||
if (folders.length == 3 && folders[2].equals("pictures")) {
|
||||
// Url if for a folder(like movies, tv etc).
|
||||
Document doc = Http.url(url).get();
|
||||
return doc.select(".itemheadingmedium").first().text();
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
LOGGER.error(e);
|
||||
}
|
||||
throw new MalformedURLException("Unable to fetch the gid for given url.");
|
||||
}
|
||||
|
||||
private class ListalImageDownloadThread extends Thread {
|
||||
|
||||
private URL url;
|
||||
private int index;
|
||||
|
||||
public ListalImageDownloadThread(URL url, int index) {
|
||||
super();
|
||||
this.url = url;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
getImage();
|
||||
}
|
||||
|
||||
public void getImage() {
|
||||
try {
|
||||
Document doc = Http.url(url).get();
|
||||
|
||||
String imageUrl = doc.getElementsByClass("pure-img").attr("src");
|
||||
if (imageUrl != "") {
|
||||
addURLToDownload(new URL(imageUrl), getPrefix(index), "", null, null,
|
||||
getImageName());
|
||||
} else {
|
||||
LOGGER.error("Couldnt find image from url: " + url);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("[!] Exception while downloading image: " + url, e);
|
||||
}
|
||||
}
|
||||
|
||||
public String getImageName() {
|
||||
// Returns the image number of the link if possible.
|
||||
String name = this.url.toExternalForm();
|
||||
try {
|
||||
name = name.substring(name.lastIndexOf("/") + 1);
|
||||
} catch (Exception e) {
|
||||
LOGGER.info("Failed to get name for the image.");
|
||||
name = null;
|
||||
}
|
||||
// Listal stores images as .jpg
|
||||
return name + ".jpg";
|
||||
}
|
||||
}
|
||||
|
||||
private static enum UrlType {
|
||||
LIST, FOLDER, UNKNOWN
|
||||
}
|
||||
}
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @author Tushar
|
||||
*
|
||||
*/
|
||||
public class ListalRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Pattern p1 = Pattern.compile("https:\\/\\/www.listal.com\\/list\\/([a-zA-Z0-9-]+)");
|
||||
private Pattern p2 =
|
||||
Pattern.compile("https:\\/\\/www.listal.com\\/((?:(?:[a-zA-Z0-9-_%]+)\\/?)+)");
|
||||
private String listId = null; // listId to get more images via POST.
|
||||
private String postUrl = "https://www.listal.com/item-list/"; //to load more images.
|
||||
private UrlType urlType = UrlType.UNKNOWN;
|
||||
|
||||
private DownloadThreadPool listalThreadPool = new DownloadThreadPool("listalThreadPool");
|
||||
|
||||
public ListalRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "listal.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "listal";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
Document doc = Http.url(url).get();
|
||||
if (urlType == UrlType.LIST) {
|
||||
listId = doc.select("#customlistitems").first().attr("data-listid"); // Used for list types.
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
if (urlType == UrlType.LIST) {
|
||||
// for url of type LIST, https://www.listal.com/list/my-list
|
||||
return getURLsForListType(page);
|
||||
} else if (urlType == UrlType.FOLDER) {
|
||||
// for url of type FOLDER, https://www.listal.com/jim-carrey/pictures
|
||||
return getURLsForFolderType(page);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
listalThreadPool.addThread(new ListalImageDownloadThread(url, index));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Matcher m1 = p1.matcher(url.toExternalForm());
|
||||
if (m1.matches()) {
|
||||
// Return the text contained between () in the regex
|
||||
urlType = UrlType.LIST;
|
||||
return m1.group(1);
|
||||
}
|
||||
|
||||
Matcher m2 = p2.matcher(url.toExternalForm());
|
||||
if (m2.matches()) {
|
||||
// Return only gid from capturing group of type listal.com/tvOrSomething/dexter/pictures
|
||||
urlType = UrlType.FOLDER;
|
||||
return getFolderTypeGid(m2.group(1));
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected listal.com URL format: "
|
||||
+ "listal.com/list/my-list-name - got " + url + " instead.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document page) throws IOException, URISyntaxException {
|
||||
Document nextPage = super.getNextPage(page);
|
||||
switch (urlType) {
|
||||
case LIST:
|
||||
if (!page.select(".loadmoreitems").isEmpty()) {
|
||||
// All items are not loaded.
|
||||
// Load remaining items using postUrl.
|
||||
|
||||
String offSet = page.select(".loadmoreitems").last().attr("data-offset");
|
||||
Map<String, String> postParams = new HashMap<>();
|
||||
postParams.put("listid", listId);
|
||||
postParams.put("offset", offSet);
|
||||
try {
|
||||
nextPage = Http.url(postUrl).data(postParams).retries(3).post();
|
||||
} catch (IOException e1) {
|
||||
LOGGER.error("Failed to load more images after " + offSet, e1);
|
||||
throw e1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case FOLDER:
|
||||
Elements pageLinks = page.select(".pages a");
|
||||
if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) {
|
||||
String nextUrl = pageLinks.last().attr("abs:href");
|
||||
nextPage = Http.url(nextUrl).retries(3).get();
|
||||
}
|
||||
break;
|
||||
|
||||
case UNKNOWN:
|
||||
default:
|
||||
}
|
||||
return nextPage;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return listalThreadPool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the image urls for UrlType LIST.
|
||||
*/
|
||||
private List<String> getURLsForListType(Document page) {
|
||||
List<String> list = new ArrayList<>();
|
||||
for (Element e : page.select(".pure-g a[href*=viewimage]")) {
|
||||
//list.add("https://www.listal.com" + e.attr("href") + "h");
|
||||
list.add(e.attr("abs:href") + "h");
|
||||
}
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the image urls for UrlType FOLDER.
|
||||
*/
|
||||
private List<String> getURLsForFolderType(Document page) {
|
||||
List<String> list = new ArrayList<>();
|
||||
for (Element e : page.select("#browseimagescontainer .imagewrap-outer a")) {
|
||||
list.add(e.attr("abs:href") + "h");
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the gid for url type listal.com/tvOrSomething/dexter/pictures
|
||||
*/
|
||||
public String getFolderTypeGid(String group) throws MalformedURLException {
|
||||
String[] folders = group.split("/");
|
||||
try {
|
||||
if (folders.length == 2 && folders[1].equals("pictures")) {
|
||||
// Url is probably for an actor.
|
||||
return folders[0];
|
||||
}
|
||||
|
||||
if (folders.length == 3 && folders[2].equals("pictures")) {
|
||||
// Url if for a folder(like movies, tv etc).
|
||||
Document doc = Http.url(url).get();
|
||||
return doc.select(".itemheadingmedium").first().text();
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
LOGGER.error(e);
|
||||
}
|
||||
throw new MalformedURLException("Unable to fetch the gid for given url.");
|
||||
}
|
||||
|
||||
private class ListalImageDownloadThread implements Runnable {
|
||||
|
||||
private final URL url;
|
||||
private final int index;
|
||||
|
||||
public ListalImageDownloadThread(URL url, int index) {
|
||||
super();
|
||||
this.url = url;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
getImage();
|
||||
}
|
||||
|
||||
public void getImage() {
|
||||
try {
|
||||
Document doc = Http.url(url).get();
|
||||
|
||||
String imageUrl = doc.getElementsByClass("pure-img").attr("src");
|
||||
if (imageUrl != "") {
|
||||
addURLToDownload(new URI(imageUrl).toURL(), getPrefix(index), "", null, null,
|
||||
getImageName());
|
||||
} else {
|
||||
LOGGER.error("Couldnt find image from url: " + url);
|
||||
}
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
LOGGER.error("[!] Exception while downloading image: " + url, e);
|
||||
}
|
||||
}
|
||||
|
||||
public String getImageName() {
|
||||
// Returns the image number of the link if possible.
|
||||
String name = this.url.toExternalForm();
|
||||
try {
|
||||
name = name.substring(name.lastIndexOf("/") + 1);
|
||||
} catch (Exception e) {
|
||||
LOGGER.info("Failed to get name for the image.");
|
||||
name = null;
|
||||
}
|
||||
// Listal stores images as .jpg
|
||||
return name + ".jpg";
|
||||
}
|
||||
}
|
||||
|
||||
private static enum UrlType {
|
||||
LIST, FOLDER, UNKNOWN
|
||||
}
|
||||
}
|
||||
|
@ -1,26 +1,26 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class LusciousRipper extends AbstractHTMLRipper {
|
||||
private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.
|
||||
private static String albumid;
|
||||
|
||||
private Pattern p = Pattern.compile("^https?://(?:www\\.)?(?:members\\.||legacy\\.||old\\.)?luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$");
|
||||
private DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");
|
||||
private static final Pattern p = Pattern.compile("^https?://(?:www\\.)?(?:members\\.||legacy\\.||old\\.)?luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$");
|
||||
|
||||
public LusciousRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@ -46,40 +46,48 @@ public class LusciousRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
Document page = Http.url(url).get();
|
||||
LOGGER.info("First page is " + url);
|
||||
return page;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
public List<String> getURLsFromPage(Document page) { // gets urls for all pages through the api
|
||||
List<String> urls = new ArrayList<>();
|
||||
Elements urlElements = page.select("div.item.thumbnail.ic_container > a");
|
||||
for (Element e : urlElements) {
|
||||
urls.add(e.attr("abs:href"));
|
||||
int totalPages = 1;
|
||||
|
||||
for (int i = 1; i <= totalPages; i++) {
|
||||
String APIStringWOVariables = "https://apicdn.luscious.net/graphql/nobatch/?operationName=PictureListInsideAlbum&query=%2520query%2520PictureListInsideAlbum%28%2524input%253A%2520PictureListInput%21%29%2520%257B%2520picture%2520%257B%2520list%28input%253A%2520%2524input%29%2520%257B%2520info%2520%257B%2520...FacetCollectionInfo%2520%257D%2520items%2520%257B%2520__typename%2520id%2520title%2520description%2520created%2520like_status%2520number_of_comments%2520number_of_favorites%2520moderation_status%2520width%2520height%2520resolution%2520aspect_ratio%2520url_to_original%2520url_to_video%2520is_animated%2520position%2520permissions%2520url%2520tags%2520%257B%2520category%2520text%2520url%2520%257D%2520thumbnails%2520%257B%2520width%2520height%2520size%2520url%2520%257D%2520%257D%2520%257D%2520%257D%2520%257D%2520fragment%2520FacetCollectionInfo%2520on%2520FacetCollectionInfo%2520%257B%2520page%2520has_next_page%2520has_previous_page%2520total_items%2520total_pages%2520items_per_page%2520url_complete%2520%257D%2520&variables=";
|
||||
Connection con = Http.url(APIStringWOVariables + encodeVariablesPartOfURL(i, albumid)).method(Connection.Method.GET).retries(5).connection();
|
||||
con.ignoreHttpErrors(true);
|
||||
con.ignoreContentType(true);
|
||||
con.userAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0");
|
||||
Connection.Response res;
|
||||
try {
|
||||
res = con.execute();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
String body = res.body();
|
||||
|
||||
JSONObject jsonObject = new JSONObject(body);
|
||||
|
||||
JSONObject data = jsonObject.getJSONObject("data");
|
||||
JSONObject picture = data.getJSONObject("picture");
|
||||
JSONObject list = picture.getJSONObject("list");
|
||||
JSONArray items = list.getJSONArray("items");
|
||||
JSONObject info = list.getJSONObject("info");
|
||||
totalPages = info.getInt("total_pages");
|
||||
|
||||
for (int j = 0; j < items.length(); j++) {
|
||||
JSONObject item = items.getJSONObject(j);
|
||||
String urlToOriginal = item.getString("url_to_original");
|
||||
urls.add(urlToOriginal);
|
||||
}
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
|
||||
// Simply GET the nextPageUrl also works. Therefore, we do this...
|
||||
Element nextPageElement = doc.select("div#next_page > div > a").first();
|
||||
if (nextPageElement == null) {
|
||||
throw new IOException("No next page found.");
|
||||
}
|
||||
|
||||
return Http.url(nextPageElement.attr("abs:href")).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
Matcher m = P.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
albumid = m.group(1).split("_")[m.group(1).split("_").length - 1];
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected luscious.net URL format: "
|
||||
@ -87,45 +95,17 @@ public class LusciousRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
lusciousThreadPool.addThread(new LusciousDownloadThread(url, index));
|
||||
protected void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return lusciousThreadPool;
|
||||
}
|
||||
public static String encodeVariablesPartOfURL(int page, String albumId) {
|
||||
try {
|
||||
String json = "{\"input\":{\"filters\":[{\"name\":\"album_id\",\"value\":\"" + albumId + "\"}],\"display\":\"rating_all_time\",\"items_per_page\":50,\"page\":" + page + "}}";
|
||||
|
||||
public class LusciousDownloadThread extends Thread {
|
||||
private URL url;
|
||||
private int index;
|
||||
|
||||
public LusciousDownloadThread(URL url, int index) {
|
||||
this.url = url;
|
||||
this.index = index;
|
||||
return URLEncoder.encode(json, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new IllegalStateException("Could not encode variables");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
Document page = Http.url(url).retries(RETRY_COUNT).get();
|
||||
|
||||
String downloadUrl = page.select(".icon-download").attr("abs:href");
|
||||
if (downloadUrl.equals("")) {
|
||||
// This is here for pages with mp4s instead of images.
|
||||
downloadUrl = page.select("div > video > source").attr("src");
|
||||
if (!downloadUrl.equals("")) {
|
||||
throw new IOException("Could not find download url for image or video.");
|
||||
}
|
||||
}
|
||||
|
||||
//If a valid download url was found.
|
||||
addURLToDownload(new URL(downloadUrl), getPrefix(index));
|
||||
|
||||
} catch (IOException e) {
|
||||
LOGGER.error("Error downloadiong url " + url, e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -1,36 +1,42 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class MangadexRipper extends AbstractJSONRipper {
|
||||
private String chapterApiEndPoint = "https://mangadex.org/api/chapter/";
|
||||
|
||||
private String getImageUrl(String chapterHash, String imageName, String server) {
|
||||
return server + chapterHash + "/" + imageName;
|
||||
}
|
||||
private final String chapterApiEndPoint = "https://mangadex.org/api/chapter/";
|
||||
private final String mangaApiEndPoint = "https://mangadex.org/api/manga/";
|
||||
private boolean isSingleChapter;
|
||||
|
||||
public MangadexRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
private String getImageUrl(String chapterHash, String imageName, String server) {
|
||||
return server + chapterHash + "/" + imageName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "mangadex";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "mangadex.org";
|
||||
@ -44,14 +50,19 @@ public class MangadexRipper extends AbstractJSONRipper {
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
String capID = getChapterID(url.toExternalForm());
|
||||
String mangaID = getMangaID(url.toExternalForm());
|
||||
if (capID != null) {
|
||||
isSingleChapter = true;
|
||||
return capID;
|
||||
} else if (mangaID != null) {
|
||||
isSingleChapter = false;
|
||||
return mangaID;
|
||||
}
|
||||
throw new MalformedURLException("Unable to get chapter ID from" + url);
|
||||
}
|
||||
|
||||
private String getChapterID(String url) {
|
||||
Pattern p = Pattern.compile("https://mangadex.org/chapter/([\\d]+)/?");
|
||||
Pattern p = Pattern.compile("https://mangadex.org/chapter/([\\d]+)/([\\d+]?)");
|
||||
Matcher m = p.matcher(url);
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
@ -59,26 +70,79 @@ public class MangadexRipper extends AbstractJSONRipper {
|
||||
return null;
|
||||
}
|
||||
|
||||
private String getMangaID(String url) {
|
||||
Pattern p = Pattern.compile("https://mangadex.org/title/([\\d]+)/(.+)");
|
||||
Matcher m = p.matcher(url);
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public JSONObject getFirstPage() throws IOException {
|
||||
public JSONObject getFirstPage() throws IOException, URISyntaxException {
|
||||
// Get the chapter ID
|
||||
String chapterID = getChapterID(url.toExternalForm());
|
||||
return Http.url(new URL(chapterApiEndPoint + chapterID)).getJSON();
|
||||
String mangaID = getMangaID(url.toExternalForm());
|
||||
if (mangaID != null) {
|
||||
return Http.url(new URI(mangaApiEndPoint + mangaID).toURL()).getJSON();
|
||||
} else
|
||||
return Http.url(new URI(chapterApiEndPoint + chapterID).toURL()).getJSON();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromJSON(JSONObject json) {
|
||||
if (isSingleChapter) {
|
||||
List<String> assetURLs = new ArrayList<>();
|
||||
JSONArray currentObject;
|
||||
String chapterHash;
|
||||
// Server is the cdn hosting the images.
|
||||
String server;
|
||||
chapterHash = json.getString("hash");
|
||||
server = json.getString("server");
|
||||
for (int i = 0; i < json.getJSONArray("page_array").length(); i++) {
|
||||
currentObject = json.getJSONArray("page_array");
|
||||
|
||||
assetURLs.add(getImageUrl(chapterHash, currentObject.getString(i), server));
|
||||
}
|
||||
return assetURLs;
|
||||
}
|
||||
JSONObject chaptersJSON = (JSONObject) json.get("chapter");
|
||||
JSONObject temp;
|
||||
Iterator<String> keys = chaptersJSON.keys();
|
||||
HashMap<Double, String> chapterIDs = new HashMap<>();
|
||||
while (keys.hasNext()) {
|
||||
String keyValue = keys.next();
|
||||
temp = (JSONObject) chaptersJSON.get(keyValue);
|
||||
if (temp.getString("lang_name").equals("English")) {
|
||||
chapterIDs.put(temp.getDouble("chapter"), keyValue);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
List<String> assetURLs = new ArrayList<>();
|
||||
JSONArray currentObject;
|
||||
|
||||
String chapterHash = json.getString("hash");
|
||||
String chapterHash;
|
||||
// Server is the cdn hosting the images.
|
||||
String server = json.getString("server");
|
||||
String server;
|
||||
JSONObject chapterJSON = null;
|
||||
TreeMap<Double, String> treeMap = new TreeMap<>(chapterIDs);
|
||||
for (Double aDouble : treeMap.keySet()) {
|
||||
double key = (double) aDouble;
|
||||
try {
|
||||
chapterJSON = Http.url(new URI(chapterApiEndPoint + treeMap.get(key)).toURL()).getJSON();
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
sendUpdate(RipStatusMessage.STATUS.LOADING_RESOURCE, "chapter " + key);
|
||||
chapterHash = chapterJSON.getString("hash");
|
||||
server = chapterJSON.getString("server");
|
||||
for (int i = 0; i < chapterJSON.getJSONArray("page_array").length(); i++) {
|
||||
currentObject = chapterJSON.getJSONArray("page_array");
|
||||
|
||||
for (int i = 0; i < json.getJSONArray("page_array").length(); i++) {
|
||||
currentObject = json.getJSONArray("page_array");
|
||||
|
||||
assetURLs.add(getImageUrl(chapterHash, currentObject.getString(i), server));
|
||||
assetURLs.add(getImageUrl(chapterHash, currentObject.getString(i), server));
|
||||
}
|
||||
}
|
||||
|
||||
return assetURLs;
|
||||
@ -91,4 +155,5 @@ public class MangadexRipper extends AbstractJSONRipper {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -48,12 +48,6 @@ public class ManganeloRipper extends AbstractHTMLRipper {
|
||||
"/manganelo.com/manga/ID - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
Element elem = doc.select("div.btn-navigation-chap > a.back").first();
|
||||
|
@ -21,12 +21,12 @@ public class MeituriRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "meituri";
|
||||
return "tujigu";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "meituri.com";
|
||||
return "tujigu.com";
|
||||
}
|
||||
|
||||
// To use in getting URLs
|
||||
@ -35,23 +35,18 @@ public class MeituriRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
// without escape
|
||||
// ^https?://[w.]*meituri\.com/a/([0-9]+)/([0-9]+\.html)*$
|
||||
// https://www.meituri.com/a/14449/
|
||||
// also matches https://www.meituri.com/a/14449/3.html etc.
|
||||
// ^https?://[w.]*tujigu\.com/a/([0-9]+)/([0-9]+\.html)*$
|
||||
// https://www.tujigu.com/a/14449/
|
||||
// also matches https://www.tujigu.com/a/14449/3.html etc.
|
||||
// group 1 is 14449
|
||||
Pattern p = Pattern.compile("^https?://[w.]*meituri\\.com/a/([0-9]+)/([0-9]+\\.html)*$");
|
||||
Pattern p = Pattern.compile("^https?://[w.]*tujigu\\.com/a/([0-9]+)/([0-9]+\\.html)*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
albumID = m.group(1);
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException(
|
||||
"Expected meituri.com URL format: " + "meituri.com/a/albumid/ - got " + url + "instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
"Expected tujigu.com URL format: " + "tujigu.com/a/albumid/ - got " + url + "instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -71,7 +66,7 @@ public class MeituriRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
|
||||
// Base URL: http://ii.hywly.com/a/1/albumid/imgnum.jpg
|
||||
String baseURL = "http://ii.hywly.com/a/1/" + albumID + "/";
|
||||
String baseURL = "https://tjg.hywly.com/a/1/" + albumID + "/";
|
||||
|
||||
// Loop through and add images to the URL list
|
||||
for (int i = 1; i <= numOfImages; i++) {
|
||||
|
@ -41,11 +41,6 @@ public class ModelxRipper extends AbstractHTMLRipper {
|
||||
throw new MalformedURLException("Expected URL format: http://www.modelx.org/[category (one or more)]/xxxxx got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> result = new ArrayList<>();
|
||||
|
@ -2,6 +2,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -13,7 +15,6 @@ import org.jsoup.nodes.Element;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.jsoup.select.Elements;
|
||||
@ -59,20 +60,21 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
||||
if (!notHome) {
|
||||
StringBuilder newPath = new StringBuilder(path);
|
||||
newPath.insert(2, "M");
|
||||
firstURL = new URL(this.url, "https://" + DOMAIN + newPath);
|
||||
firstURL = URI.create("https://" + DOMAIN + newPath).toURL();
|
||||
LOGGER.info("Changed URL to " + firstURL);
|
||||
}
|
||||
return Http.url(firstURL).referrer("https://motherless.com").get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
public Document getNextPage(Document doc) throws IOException, URISyntaxException {
|
||||
|
||||
Elements nextPageLink = doc.head().select("link[rel=next]");
|
||||
if (nextPageLink.isEmpty()) {
|
||||
throw new IOException("Last page reached");
|
||||
} else {
|
||||
String referrerLink = doc.head().select("link[rel=canonical]").first().attr("href");
|
||||
URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
|
||||
URL nextURL = this.url.toURI().resolve(nextPageLink.first().attr("href")).toURL();
|
||||
return Http.url(nextURL).referrer(referrerLink).get();
|
||||
}
|
||||
}
|
||||
@ -81,7 +83,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
List<String> pageURLs = new ArrayList<>();
|
||||
|
||||
for (Element thumb : page.select("div.thumb a.img-container")) {
|
||||
for (Element thumb : page.select("div.thumb-container a.img-container")) {
|
||||
if (isStopped()) {
|
||||
break;
|
||||
}
|
||||
@ -109,7 +111,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
// Create thread for finding image at "url" page
|
||||
MotherlessImageThread mit = new MotherlessImageThread(url, index);
|
||||
MotherlessImageRunnable mit = new MotherlessImageRunnable(url, index);
|
||||
motherlessThreadPool.addThread(mit);
|
||||
try {
|
||||
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||
@ -148,15 +150,19 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
||||
throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected DownloadThreadPool getThreadPool() {
|
||||
return motherlessThreadPool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class to find and download images found on "image" pages
|
||||
*/
|
||||
private class MotherlessImageThread extends Thread {
|
||||
private URL url;
|
||||
private int index;
|
||||
private class MotherlessImageRunnable implements Runnable {
|
||||
private final URL url;
|
||||
private final int index;
|
||||
|
||||
MotherlessImageThread(URL url, int index) {
|
||||
MotherlessImageRunnable(URL url, int index) {
|
||||
super();
|
||||
this.url = url;
|
||||
this.index = index;
|
||||
@ -180,11 +186,11 @@ public class MotherlessRipper extends AbstractHTMLRipper {
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
prefix = String.format("%03d_", index);
|
||||
}
|
||||
addURLToDownload(new URL(file), prefix);
|
||||
addURLToDownload(new URI(file).toURL(), prefix);
|
||||
} else {
|
||||
LOGGER.warn("[!] could not find '__fileurl' at " + url);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,223 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
|
||||
public class MrCongRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Document currDoc;
|
||||
private int lastPageNum;
|
||||
private int currPageNum;
|
||||
private boolean tagPage = false;
|
||||
|
||||
public MrCongRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
currPageNum = 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "mrcong";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "mrcong.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
System.out.println(url.toExternalForm());
|
||||
Pattern p = Pattern.compile("^https?://mrcong\\.com/(\\S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$");
|
||||
Pattern p2 = Pattern.compile("^https?://mrcong\\.com/tag/(\\S*)/$"); //Added 6-10-21
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
Matcher m2 = p2.matcher(url.toExternalForm()); //6-10-21
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
else if(m2.matches()) { //Added 6-10-21
|
||||
tagPage = true;
|
||||
System.out.println("tagPage = TRUE");
|
||||
return m2.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected mrcong.com URL format: "
|
||||
+ "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException { //returns the root gallery page regardless of actual page number
|
||||
// "url" is an instance field of the superclass
|
||||
String rootUrlStr;
|
||||
URL rootUrl;
|
||||
|
||||
if(!tagPage) {
|
||||
rootUrlStr = url.toExternalForm().replaceAll("(|/|/[0-9]+/?)$", "/");
|
||||
} else { //6-10-21
|
||||
rootUrlStr = url.toExternalForm().replaceAll("(page/[0-9]+/)$", "page/1/");
|
||||
}
|
||||
|
||||
rootUrl = URI.create(rootUrlStr).toURL();
|
||||
url = rootUrl;
|
||||
currPageNum = 1;
|
||||
currDoc = Http.url(url).get();
|
||||
getMaxPageNumber(currDoc);
|
||||
return currDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
int pageNum = currPageNum;
|
||||
String urlStr;
|
||||
if(!tagPage) {
|
||||
if (pageNum == 1 && lastPageNum > 1) {
|
||||
urlStr = url.toExternalForm().concat((pageNum + 1) + "");
|
||||
System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
|
||||
} else if (pageNum < lastPageNum) {
|
||||
urlStr = url.toExternalForm().replaceAll("(/([0-9]*)/?)$", ("/" + (pageNum + 1) + "/"));
|
||||
System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
|
||||
} else {
|
||||
//System.out.printf("Error: Page number provided goes past last valid page number\n");
|
||||
throw (new IOException("Error: Page number provided goes past last valid page number\n"));
|
||||
}
|
||||
} else { //6-10-21
|
||||
//if (pageNum == 1 && lastPageNum >= 1) {
|
||||
if (pageNum == 1 && lastPageNum > 1) { //6-10-21
|
||||
urlStr = url.toExternalForm().concat("page/" + (pageNum + 1) + "");
|
||||
System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
|
||||
} else if (pageNum < lastPageNum) {
|
||||
urlStr = url.toExternalForm().replaceAll("(page/([0-9]*)/?)$", ("page/" + (pageNum + 1) + "/"));
|
||||
System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
|
||||
} else {
|
||||
//System.out.printf("Error: Page number provided goes past last valid page number\n");
|
||||
System.out.print("Error: There is no next page!\n");
|
||||
return null;
|
||||
//throw (new IOException("Error: Page number provided goes past last valid page number\n"));
|
||||
}
|
||||
}
|
||||
|
||||
url = URI.create(urlStr).toURL();
|
||||
currDoc = Http.url(url).get();
|
||||
currPageNum ++;//hi
|
||||
return currDoc;
|
||||
}
|
||||
|
||||
private int getMaxPageNumber(Document doc) {
|
||||
if(!tagPage) {
|
||||
try {
|
||||
lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); //gets the last possible page for the gallery
|
||||
} catch(Exception e) {
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); //gets the last possible page for the gallery
|
||||
System.out.println("The last page found for " + url + " was " + lastPageNum);
|
||||
} catch(Exception e) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return lastPageNum;
|
||||
}
|
||||
|
||||
private int getCurrentPageNum(Document doc) {
|
||||
int currPage; //6-10-21
|
||||
|
||||
if(!tagPage) {
|
||||
currPage = Integer.parseInt(doc.select("div.page-link > span").first().text());
|
||||
} else {
|
||||
currPage = Integer.parseInt(doc.select("div.pagination > span").first().text());
|
||||
}
|
||||
|
||||
System.out.println("The current page was found to be: " + currPage);
|
||||
|
||||
return currPage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) { //gets the urls of the images
|
||||
List<String> result = new ArrayList<>();
|
||||
|
||||
if(!tagPage) {
|
||||
for (Element el : doc.select("p > img")) {
|
||||
String imageSource = el.attr("src");
|
||||
result.add(imageSource);
|
||||
}
|
||||
|
||||
System.out.println("\n1.)Printing List: " + result + "\n");
|
||||
} else { //6-10-21
|
||||
//List<String> gallery_set_list = new ArrayList<>();
|
||||
|
||||
for (Element el : doc.select("h2 > a")) {
|
||||
String pageSource = el.attr("href");
|
||||
if(!pageSource.equals("https://mrcong.com/")) {
|
||||
result.add(pageSource);
|
||||
System.out.println("\n" + pageSource + " has been added to the list.");
|
||||
}
|
||||
}
|
||||
|
||||
/*for (String el2 : gallery_set_list) {
|
||||
try {
|
||||
URL temp_urL = URI.create(el2).toURL();
|
||||
MrCongRipper mcr = new MrCongRipper(temp_urL);
|
||||
System.out.println("URL being ripped: " + mcr.url.toString());
|
||||
result.addAll(mcr.getURLsFromPage(mcr.getFirstPage()));
|
||||
|
||||
Document nextPg = mcr.getNextPage(mcr.currDoc);
|
||||
while(nextPg != null) {
|
||||
result.addAll(mcr.getURLsFromPage(nextPg));
|
||||
nextPg = mcr.getNextPage(mcr.currDoc);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}*/
|
||||
|
||||
System.out.println("\n2.)Printing List: " + result + "\n");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
//addURLToDownload(url, getPrefix(index));
|
||||
|
||||
if(!tagPage) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
} else {
|
||||
try {
|
||||
List<String> ls = this.getURLsFromPage(this.currDoc);
|
||||
Document np = this.getNextPage(this.currDoc);
|
||||
|
||||
while(np != null) { //Creates a list of all sets to download
|
||||
ls.addAll(this.getURLsFromPage(np));
|
||||
np = this.getNextPage(np);
|
||||
}
|
||||
|
||||
for(String urlStr : ls) {
|
||||
MrCongRipper mcr = new MrCongRipper(URI.create(urlStr).toURL());
|
||||
mcr.setup();
|
||||
mcr.rip();
|
||||
}
|
||||
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,71 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class MultpornRipper extends AbstractHTMLRipper {
|
||||
|
||||
public MultpornRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return "multporn.net";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "multporn";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException, URISyntaxException {
|
||||
Pattern p = Pattern.compile("^https?://multporn\\.net/node/(\\d+)/.*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
try {
|
||||
String nodeHref = Http.url(url).get().select(".simple-mode-switcher").attr("href");
|
||||
p = Pattern.compile("/node/(\\d+)/.*");
|
||||
m = p.matcher(nodeHref);
|
||||
if (m.matches()) {
|
||||
this.url = new URI("https://multporn.net" + nodeHref).toURL();
|
||||
return m.group(1);
|
||||
}
|
||||
}catch (Exception ignored){};
|
||||
|
||||
throw new MalformedURLException("Expected multporn.net URL format: " +
|
||||
"multporn.net/comics/comicid / multporn.net/node/id/* - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
Elements thumbs = page.select(".mfp-gallery-image .mfp-item");
|
||||
for (Element el : thumbs) {
|
||||
imageURLs.add(el.attr("href"));
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user