mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-01-18 21:17:59 +01:00
Merge branch 'master' into m
This commit is contained in:
commit
f089e804fd
3
.gitignore
vendored
3
.gitignore
vendored
@ -12,3 +12,6 @@ history.json
|
||||
*.iml
|
||||
.settings/
|
||||
.classpath
|
||||
*.txt
|
||||
bin/
|
||||
.vscode/
|
||||
|
@ -24,7 +24,7 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/
|
||||
* Quickly downloads all images in an online album (see supported sites below)
|
||||
* Easily re-rip albums to fetch new content
|
||||
|
||||
## Supported sites:
|
||||
## [List of Supported Sites](https://github.com/4pr0n/ripme/wiki/Supported-Sites)
|
||||
|
||||
* imgur
|
||||
* twitter
|
||||
@ -44,11 +44,9 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/
|
||||
* xhamster
|
||||
* (more)
|
||||
|
||||
### [Full updated list](https://github.com/4pr0n/ripme/issues/8)
|
||||
|
||||
## Not Supported?
|
||||
|
||||
Request support for more sites by adding a comment to [this Github issue](https://github.com/4pr0n/ripme/issues/8).
|
||||
Request support for more sites by adding a comment to [this Github issue](https://github.com/4pr0n/ripme/issues/502).
|
||||
|
||||
If you're a developer, you can add your own by following the wiki guide
|
||||
[How To Create A Ripper for HTML Websites](https://github.com/4pr0n/ripme/wiki/How-To-Create-A-Ripper-for-HTML-websites).
|
||||
|
2
pom.xml
2
pom.xml
@ -4,7 +4,7 @@
|
||||
<groupId>com.rarchives.ripme</groupId>
|
||||
<artifactId>ripme</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>1.4.2</version>
|
||||
<version>1.4.7</version>
|
||||
<name>ripme</name>
|
||||
<url>http://rip.rarchives.com</url>
|
||||
<properties>
|
||||
|
@ -1,6 +1,11 @@
|
||||
{
|
||||
"latestVersion" : "1.4.2",
|
||||
"latestVersion" : "1.4.7",
|
||||
"changeList" : [
|
||||
"1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper",
|
||||
"1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.",
|
||||
"1.4.5: SinnerComics: Added work around for naming bug",
|
||||
"1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.",
|
||||
"1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.",
|
||||
"1.4.2: Added nhentai ripper.",
|
||||
"1.4.1: Fixed Imgbox: correctly downloads full-size images.",
|
||||
"1.4.0: Fixed update mechanism. Some improvements to Imgur, etc.",
|
||||
|
@ -19,7 +19,7 @@ import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class ChanRipper extends AbstractHTMLRipper {
|
||||
public static List<ChanSite> explicit_domains = Arrays.asList(
|
||||
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org")),
|
||||
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org")),
|
||||
new ChanSite(Arrays.asList("archive.moe"), Arrays.asList("data.archive.moe")),
|
||||
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")),
|
||||
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")),
|
||||
|
@ -6,10 +6,12 @@ import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
@ -24,13 +26,11 @@ import org.jsoup.select.Elements;
|
||||
*
|
||||
* @author
|
||||
*/
|
||||
public class E621Ripper extends AbstractHTMLRipper{
|
||||
private static Pattern gidPattern=null;
|
||||
private static Pattern gidPattern2=null;
|
||||
private static Pattern gidPatternPool=null;
|
||||
|
||||
private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621");
|
||||
|
||||
public class E621Ripper extends AbstractHTMLRipper {
|
||||
public static final int POOL_IMAGES_PER_PAGE = 24;
|
||||
|
||||
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
|
||||
|
||||
public E621Ripper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
@ -52,31 +52,50 @@ public class E621Ripper extends AbstractHTMLRipper{
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if(url.getPath().startsWith("/pool/show/"))
|
||||
return Http.url("https://e621.net/pool/show/"+getTerm(url)).get();
|
||||
else
|
||||
return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get();
|
||||
if (url.getPath().startsWith("/pool/show/")) {
|
||||
return Http.url("https://e621.net/pool/show/" + getTerm(url)).get();
|
||||
} else {
|
||||
return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
Elements elements=page.select("#post-list .thumb a,#pool-show .thumb a");
|
||||
List<String> res=new ArrayList<String>(elements.size());
|
||||
|
||||
for(Element e:elements){
|
||||
res.add(e.absUrl("href")+"#"+e.child(0).attr("id").substring(1));
|
||||
Elements elements = page.select("#post-list .thumb a,#pool-show .thumb a");
|
||||
List<String> res = new ArrayList<String>(elements.size());
|
||||
|
||||
if (page.getElementById("pool-show") != null) {
|
||||
int index = 0;
|
||||
|
||||
Element e = page.getElementById("paginator");
|
||||
if (e != null) {
|
||||
e = e.getElementsByClass("current").first();
|
||||
if (e != null) {
|
||||
index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE;
|
||||
}
|
||||
}
|
||||
|
||||
for (Element e_ : elements) {
|
||||
res.add(e_.absUrl("href") + "#" + ++index);
|
||||
}
|
||||
|
||||
} else {
|
||||
for (Element e : elements) {
|
||||
res.add(e.absUrl("href") + "#" + e.child(0).attr("id").substring(1));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document page) throws IOException {
|
||||
for(Element e:page.select("#paginator a")){
|
||||
if(e.attr("rel").equals("next"))
|
||||
for (Element e : page.select("#paginator a")) {
|
||||
if (e.attr("rel").equals("next")) {
|
||||
return Http.url(e.absUrl("href")).get();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -85,58 +104,78 @@ public class E621Ripper extends AbstractHTMLRipper{
|
||||
e621ThreadPool.addThread(new Thread(new Runnable() {
|
||||
public void run() {
|
||||
try {
|
||||
Document page=Http.url(url).get();
|
||||
|
||||
addURLToDownload(new URL(page.getElementById("image").absUrl("src")),Utils.getConfigBoolean("download.save_order",true)?url.getRef()+"-":"");
|
||||
Document page = Http.url(url).get();
|
||||
Element e = page.getElementById("image");
|
||||
|
||||
if (e != null) {
|
||||
addURLToDownload(new URL(e.absUrl("src")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
|
||||
} else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) {
|
||||
addURLToDownload(new URL(e.absUrl("value")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
|
||||
} else {
|
||||
Logger.getLogger(E621Ripper.class.getName()).log(Level.WARNING, "Unsupported media type - please report to program author: " + url.toString());
|
||||
}
|
||||
|
||||
} catch (IOException ex) {
|
||||
Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex);
|
||||
}
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
private String getTerm(URL url) throws MalformedURLException{
|
||||
if(gidPattern==null)
|
||||
gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
|
||||
if(gidPatternPool==null)
|
||||
gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$");
|
||||
|
||||
Matcher m = gidPattern.matcher(url.toExternalForm());
|
||||
if(m.matches())
|
||||
return m.group(2);
|
||||
|
||||
m = gidPatternPool.matcher(url.toExternalForm());
|
||||
if(m.matches())
|
||||
return m.group(2);
|
||||
|
||||
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
|
||||
private String getTerm(URL url) throws MalformedURLException {
|
||||
String query = url.getQuery();
|
||||
|
||||
if (query != null) {
|
||||
return Utils.parseUrlQuery(query, "tags");
|
||||
}
|
||||
|
||||
if (query == null) {
|
||||
if ((query = url.getPath()).startsWith("/post/index/")) {
|
||||
query = query.substring(12);
|
||||
|
||||
int pos = query.indexOf('/');
|
||||
if (pos == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// skip page number
|
||||
query = query.substring(pos + 1);
|
||||
|
||||
if (query.endsWith("/")) {
|
||||
query = query.substring(0, query.length() - 1);
|
||||
}
|
||||
|
||||
try {
|
||||
return URLDecoder.decode(query, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// Shouldn't happen since UTF-8 is required to be supported
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
} else if (query.startsWith("/pool/show/")) {
|
||||
query = query.substring(11);
|
||||
|
||||
if (query.endsWith("/")) {
|
||||
query = query.substring(0, query.length() - 1);
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
try {
|
||||
String prefix="";
|
||||
if(url.getPath().startsWith("/pool/show/"))
|
||||
prefix="pool_";
|
||||
|
||||
return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath());
|
||||
} catch (URISyntaxException ex) {
|
||||
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
|
||||
String prefix = "";
|
||||
if (url.getPath().startsWith("/pool/show/")) {
|
||||
prefix = "pool_";
|
||||
} else {
|
||||
prefix = "term_";
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
|
||||
|
||||
return Utils.filesystemSafe(prefix + getTerm(url));
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
if(gidPattern2==null)
|
||||
gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
|
||||
|
||||
Matcher m = gidPattern2.matcher(url.toExternalForm());
|
||||
if(m.matches())
|
||||
return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20"));
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
}
|
@ -38,7 +38,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
|
||||
|
||||
private static final Map<String,String> cookies = new HashMap<String,String>();
|
||||
static {
|
||||
cookies.put("nw", "1");
|
||||
@ -53,10 +53,10 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
public String getHost() {
|
||||
return "e-hentai";
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "g.e-hentai.org";
|
||||
return "e-hentai.org";
|
||||
}
|
||||
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
@ -79,18 +79,18 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
Pattern p;
|
||||
Matcher m;
|
||||
|
||||
p = Pattern.compile("^.*g\\.e-hentai\\.org/g/([0-9]+)/([a-fA-F0-9]+)/$");
|
||||
p = Pattern.compile("^https?://e-hentai\\.org/g/([0-9]+)/([a-fA-F0-9]+)/$");
|
||||
m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1) + "-" + m.group(2);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected g.e-hentai.org gallery format: "
|
||||
+ "http://g.e-hentai.org/g/####/####/"
|
||||
"Expected e-hentai.org gallery format: "
|
||||
+ "http://e-hentai.org/g/####/####/"
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Attempts to get page, checks for IP ban, waits.
|
||||
* @param url
|
||||
@ -185,7 +185,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
|
||||
/**
|
||||
* Helper class to find and download images found on "image" pages
|
||||
*
|
||||
*
|
||||
* Handles case when site has IP-banned the user.
|
||||
*/
|
||||
private class EHentaiImageThread extends Thread {
|
||||
@ -204,7 +204,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
public void run() {
|
||||
fetchImage();
|
||||
}
|
||||
|
||||
|
||||
private void fetchImage() {
|
||||
try {
|
||||
Document doc = getPageWithRetries(this.url);
|
||||
@ -246,4 +246,4 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/index/category/([a-zA-Z0-9\\-_]+).*$");
|
||||
Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (!m.matches()) {
|
||||
throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
|
||||
@ -54,7 +54,8 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[name=description]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
title = title.replace("A huge collection of free porn comics for adults. Read", "");
|
||||
title = title.replace("online for free at 8muses.com", "");
|
||||
return getHost() + "_" + title.trim();
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
@ -122,14 +123,10 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
try {
|
||||
logger.info("Retrieving full-size image location from " + parentHref);
|
||||
Thread.sleep(1000);
|
||||
image = getFullSizeImage(parentHref);
|
||||
} catch (IOException e) {
|
||||
logger.error("Failed to get full-size image from " + parentHref);
|
||||
continue;
|
||||
} catch (InterruptedException e) {
|
||||
logger.error("Interrupted while getting full-size image from " + parentHref);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!image.contains("8muses.com")) {
|
||||
|
@ -29,11 +29,11 @@ import com.rarchives.ripme.utils.Http;
|
||||
* @author losipher
|
||||
*/
|
||||
public class EroShareRipper extends AbstractHTMLRipper {
|
||||
|
||||
|
||||
public EroShareRipper (URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "eroshare.com";
|
||||
@ -43,12 +43,28 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
public String getHost() {
|
||||
return "eroshare";
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index){
|
||||
addURLToDownload(url);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
logger.info("Unable to find title at " + url);
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc){
|
||||
List<String> URLs = new ArrayList<String>();
|
||||
@ -70,10 +86,10 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
URLs.add(videoURL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return URLs;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
Response resp = Http.url(this.url)
|
||||
@ -81,10 +97,10 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
.response();
|
||||
|
||||
Document doc = resp.parse();
|
||||
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$");
|
||||
@ -94,9 +110,9 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album");
|
||||
}
|
||||
|
||||
|
||||
public static List<URL> getURLs(URL url) throws IOException{
|
||||
|
||||
|
||||
Response resp = Http.url(url)
|
||||
.ignoreContentType()
|
||||
.response();
|
||||
@ -122,7 +138,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
URLs.add(new URL(videoURL));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return URLs;
|
||||
}
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
HOST = "imgur";
|
||||
|
||||
private final int SLEEP_BETWEEN_ALBUMS;
|
||||
|
||||
|
||||
private Document albumDoc;
|
||||
|
||||
static enum ALBUM_TYPE {
|
||||
@ -104,12 +104,10 @@ public class ImgurRipper extends AlbumRipper {
|
||||
*/
|
||||
|
||||
String title = null;
|
||||
elems = albumDoc.select(".post-title");
|
||||
if (elems.size() > 0) {
|
||||
Element postTitle = elems.get(0);
|
||||
if (postTitle != null) {
|
||||
title = postTitle.text();
|
||||
}
|
||||
logger.info("Trying to get album title");
|
||||
elems = albumDoc.select("meta[property=og:title]");
|
||||
if (elems!=null) {
|
||||
title = elems.attr("content");
|
||||
}
|
||||
|
||||
String albumTitle = "imgur_";
|
||||
@ -138,18 +136,25 @@ public class ImgurRipper extends AlbumRipper {
|
||||
case ALBUM:
|
||||
// Fall-through
|
||||
case USER_ALBUM:
|
||||
logger.info("Album type is USER_ALBUM");
|
||||
// Don't call getAlbumTitle(this.url) with this
|
||||
// as it seems to cause the album to be downloaded to a subdir.
|
||||
ripAlbum(this.url);
|
||||
break;
|
||||
case SERIES_OF_IMAGES:
|
||||
logger.info("Album type is SERIES_OF_IMAGES");
|
||||
ripAlbum(this.url);
|
||||
break;
|
||||
case USER:
|
||||
logger.info("Album type is USER");
|
||||
ripUserAccount(url);
|
||||
break;
|
||||
case SUBREDDIT:
|
||||
logger.info("Album type is SUBREDDIT");
|
||||
ripSubreddit(url);
|
||||
break;
|
||||
case USER_IMAGES:
|
||||
logger.info("Album type is USER_IMAGES");
|
||||
ripUserImages(url);
|
||||
break;
|
||||
}
|
||||
@ -338,7 +343,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
}
|
||||
return imgurAlbum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Rips all albums in an imgur user's account.
|
||||
* @param url
|
||||
@ -366,7 +371,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void ripUserImages(URL url) throws IOException {
|
||||
int page = 0; int imagesFound = 0; int imagesTotal = 0;
|
||||
String jsonUrl = url.toExternalForm().replace("/all", "/ajax/images");
|
||||
@ -404,7 +409,7 @@ public class ImgurRipper extends AlbumRipper {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void ripSubreddit(URL url) throws IOException {
|
||||
int page = 0;
|
||||
while (true) {
|
||||
|
@ -1,187 +0,0 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class MinusRipper extends AlbumRipper {
|
||||
|
||||
private static final String DOMAIN = "minus.com",
|
||||
HOST = "minus";
|
||||
|
||||
private Document albumDoc = null;
|
||||
|
||||
private static enum ALBUM_TYPE {
|
||||
GUEST,
|
||||
ACCOUNT_ALBUM,
|
||||
ACCOUNT
|
||||
}
|
||||
private ALBUM_TYPE albumType;
|
||||
|
||||
public MinusRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
}
|
||||
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
getGID(url);
|
||||
return url;
|
||||
}
|
||||
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
Elements titles = albumDoc.select("meta[property=og:title]");
|
||||
if (titles.size() > 0) {
|
||||
return HOST + "_" + titles.get(0).attr("content");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
// http://vampyr3.minus.com/
|
||||
// http://vampyr3.minus.com/uploads
|
||||
// http://minus.com/mw7ztQ6xzP7ae
|
||||
// http://vampyr3.minus.com/mw7ztQ6xzP7ae
|
||||
String u = url.toExternalForm();
|
||||
u = u.replace("/www.minus.com", "/minus.com");
|
||||
u = u.replace("/i.minus.com", "/minus.com");
|
||||
Pattern p; Matcher m;
|
||||
|
||||
p = Pattern.compile("^https?://minus\\.com/m([a-zA-Z0-9]+).*$");
|
||||
m = p.matcher(u);
|
||||
if (m.matches()) {
|
||||
albumType = ALBUM_TYPE.GUEST;
|
||||
return "guest_" + m.group(1);
|
||||
}
|
||||
|
||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-_]+)\\.minus\\.com/m([a-zA-Z0-9]+).*$");
|
||||
m = p.matcher(u);
|
||||
if (m.matches()) {
|
||||
albumType = ALBUM_TYPE.ACCOUNT_ALBUM;
|
||||
return m.group(1) + "_" + m.group(2);
|
||||
}
|
||||
|
||||
p = Pattern.compile("^https?://([a-zA-Z0-9]+)\\.minus\\.com/?(uploads)?$");
|
||||
m = p.matcher(u);
|
||||
if (m.matches()) {
|
||||
albumType = ALBUM_TYPE.ACCOUNT;
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException(
|
||||
"Expected minus.com album URL formats: "
|
||||
+ "username.minus.com or "
|
||||
+ "username.minus.com/m... or "
|
||||
+ "minus.com/m..."
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
switch (albumType) {
|
||||
case ACCOUNT:
|
||||
ripAccount(this.url);
|
||||
break;
|
||||
case ACCOUNT_ALBUM:
|
||||
ripAlbum(this.url);
|
||||
break;
|
||||
case GUEST:
|
||||
ripAlbum(this.url);
|
||||
break;
|
||||
}
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
private void ripAccount(URL url) throws IOException {
|
||||
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-_]+)\\.minus\\.com.*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (!m.matches()) {
|
||||
throw new IOException("Could not find username from URL " + url);
|
||||
}
|
||||
String user = m.group(1);
|
||||
int page = 1;
|
||||
while (true) {
|
||||
String jsonUrl = "http://" + user
|
||||
+ ".minus.com/api/pane/user/"
|
||||
+ user + "/shares.json/"
|
||||
+ page;
|
||||
logger.info(" Retrieving " + jsonUrl);
|
||||
JSONObject json = Http.url(jsonUrl).getJSON();
|
||||
JSONArray galleries = json.getJSONArray("galleries");
|
||||
for (int i = 0; i < galleries.length(); i++) {
|
||||
JSONObject gallery = galleries.getJSONObject(i);
|
||||
String title = gallery.getString("name");
|
||||
String albumUrl = "http://" + user + ".minus.com/m" + gallery.getString("reader_id");
|
||||
ripAlbum(new URL(albumUrl), Utils.filesystemSafe(title));
|
||||
|
||||
if (isThisATest()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (page >= json.getInt("total_pages") || isThisATest()) {
|
||||
break;
|
||||
}
|
||||
page++;
|
||||
}
|
||||
}
|
||||
|
||||
private void ripAlbum(URL url) throws IOException {
|
||||
ripAlbum(url, "");
|
||||
}
|
||||
private void ripAlbum(URL url, String subdir) throws IOException {
|
||||
logger.info(" Retrieving " + url.toExternalForm());
|
||||
if (albumDoc == null || !subdir.equals("")) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
Pattern p = Pattern.compile("^.*var gallerydata = (\\{.*\\});.*$", Pattern.DOTALL);
|
||||
Matcher m = p.matcher(albumDoc.data());
|
||||
if (m.matches()) {
|
||||
JSONObject json = new JSONObject(m.group(1));
|
||||
JSONArray items = json.getJSONArray("items");
|
||||
for (int i = 0; i < items.length(); i++) {
|
||||
JSONObject item = items.getJSONObject(i);
|
||||
String extension = item.getString("name");
|
||||
extension = extension.substring(extension.lastIndexOf('.'));
|
||||
String image = "http://i.minus.com/i"
|
||||
+ item.getString("id")
|
||||
+ extension;
|
||||
String prefix = "";
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
prefix = String.format("%03d_", i + 1);
|
||||
}
|
||||
addURLToDownload(new URL(image), prefix, subdir);
|
||||
if (isThisATest()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith(DOMAIN);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,200 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
||||
public static boolean isTag;
|
||||
|
||||
public MyhentaicomicsRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "myhentaicomics";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "myhentaicomics.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://myhentaicomics.com/index.php/([a-zA-Z0-9-]*)/?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
isTag = false;
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
Pattern pa = Pattern.compile("^https?://myhentaicomics.com/index.php/search\\?q=([a-zA-Z0-9-]*)([a-zA-Z0-9=&]*)?$");
|
||||
Matcher ma = pa.matcher(url.toExternalForm());
|
||||
if (ma.matches()) {
|
||||
isTag = true;
|
||||
return ma.group(1);
|
||||
}
|
||||
|
||||
Pattern pat = Pattern.compile("^http://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+\\?=:]*)?$");
|
||||
Matcher mat = pat.matcher(url.toExternalForm());
|
||||
if (mat.matches()) {
|
||||
isTag = true;
|
||||
return mat.group(1);
|
||||
}
|
||||
|
||||
throw new MalformedURLException("Expected myhentaicomics.com URL format: " +
|
||||
"myhentaicomics.com/index.php/albumName - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// Find next page
|
||||
String nextUrl = "";
|
||||
Element elem = doc.select("a.ui-icon-right").first();
|
||||
String nextPage = elem.attr("href");
|
||||
Pattern p = Pattern.compile("/index.php/[a-zA-Z0-9_-]*\\?page=\\d");
|
||||
Matcher m = p.matcher(nextPage);
|
||||
if (m.matches()) {
|
||||
nextUrl = "http://myhentaicomics.com" + m.group(0);
|
||||
}
|
||||
if (nextUrl == "") {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
// Sleep for half a sec to avoid getting IP banned
|
||||
sleep(500);
|
||||
return Http.url(nextUrl).get();
|
||||
}
|
||||
|
||||
// This replaces getNextPage when downloading from searchs and tags
|
||||
public List<String> getNextAlbumPage(String pageUrl) {
|
||||
List<String> albumPagesList = new ArrayList<String>();
|
||||
int pageNumber = 1;
|
||||
albumPagesList.add("http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber));
|
||||
while(true) {
|
||||
String urlToGet = "http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber);
|
||||
Document nextAlbumPage;
|
||||
try {
|
||||
logger.info("Grabbing " + urlToGet);
|
||||
nextAlbumPage = Http.url(urlToGet).get();
|
||||
} catch(IOException e){
|
||||
logger.warn("Failed to log link in Jsoup");
|
||||
nextAlbumPage = null;
|
||||
e.printStackTrace();
|
||||
}
|
||||
Element elem = nextAlbumPage.select("a.ui-icon-right").first();
|
||||
String nextPage = elem.attr("href");
|
||||
pageNumber = pageNumber + 1;
|
||||
if(nextPage == ""){
|
||||
logger.info("Got " + pageNumber + " pages");
|
||||
break;
|
||||
}
|
||||
else {
|
||||
logger.info(nextPage);
|
||||
albumPagesList.add(nextPage);
|
||||
logger.info("Adding " + nextPage);
|
||||
}
|
||||
}
|
||||
return albumPagesList;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<String>();
|
||||
List<String> pagesToRip;
|
||||
// Checks if this is a comic page or a page of albums
|
||||
if (doc.toString().contains("class=\"g-item g-album\"")) {
|
||||
for (Element elem : doc.select("li.g-album > a")) {
|
||||
String link = elem.attr("href");
|
||||
logger.info("Grabbing album " + link);
|
||||
pagesToRip = getNextAlbumPage(link);
|
||||
logger.info(pagesToRip);
|
||||
for (String element : pagesToRip) {
|
||||
Document album_doc;
|
||||
try {
|
||||
logger.info("grabbing " + element + " with jsoup");
|
||||
boolean startsWithhttp = element.startsWith("http");
|
||||
if (startsWithhttp == false) {
|
||||
album_doc = Http.url("http://myhentaicomics.com/" + element).get();
|
||||
}
|
||||
else {
|
||||
album_doc = Http.url(element).get();
|
||||
}
|
||||
} catch(IOException e){
|
||||
logger.warn("Failed to log link in Jsoup");
|
||||
album_doc = null;
|
||||
e.printStackTrace();
|
||||
}
|
||||
for (Element el :album_doc.select("img")) {
|
||||
String imageSource = el.attr("src");
|
||||
// This bool is here so we don't try and download the site logo
|
||||
boolean b = imageSource.startsWith("http");
|
||||
if (b == false) {
|
||||
// We replace thumbs with resizes so we can the full sized images
|
||||
imageSource = imageSource.replace("thumbs", "resizes");
|
||||
result.add("http://myhentaicomics.com/" + imageSource);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (Element el : doc.select("img")) {
|
||||
String imageSource = el.attr("src");
|
||||
// This bool is here so we don't try and download the site logo
|
||||
boolean b = imageSource.startsWith("http");
|
||||
if (b == false) {
|
||||
// We replace thumbs with resizes so we can the full sized images
|
||||
imageSource = imageSource.replace("thumbs", "resizes");
|
||||
result.add("http://myhentaicomics.com/" + imageSource);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
String url_string = url.toExternalForm();
|
||||
url_string = url_string.replace("%20", "_");
|
||||
url_string = url_string.replace("%27", "");
|
||||
url_string = url_string.replace("%28", "_");
|
||||
url_string = url_string.replace("%29", "_");
|
||||
url_string = url_string.replace("%2C", "_");
|
||||
if (isTag == true) {
|
||||
logger.info("Downloading from a tag or search");
|
||||
addURLToDownload(url, getPrefix(index), url_string.split("/")[6]);
|
||||
}
|
||||
else {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,19 +1,19 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
|
||||
public class NewsfilterRipper extends AlbumRipper {
|
||||
private static final String HOST = "newsfilter";
|
||||
|
||||
@ -33,7 +33,7 @@ public class NewsfilterRipper extends AlbumRipper {
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
String u = url.toExternalForm();
|
||||
if (u.indexOf('#') >= 0) {
|
||||
u = u.substring(0, u.indexOf('#'));
|
||||
u = u.substring(0, u.indexOf('#'));
|
||||
}
|
||||
u = u.replace("https?://m\\.newsfilter\\.org", "http://newsfilter.org");
|
||||
return new URL(u);
|
||||
@ -41,39 +41,28 @@ public class NewsfilterRipper extends AlbumRipper {
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
String gid = getGID(this.url),
|
||||
theurl = "http://newsfilter.org/gallery/" + gid;
|
||||
|
||||
Connection.Response resp = null;
|
||||
String gid = getGID(this.url);
|
||||
String theurl = "http://newsfilter.org/gallery/" + gid;
|
||||
logger.info("Loading " + theurl);
|
||||
resp = Jsoup.connect(theurl)
|
||||
.timeout(5000)
|
||||
.referrer("")
|
||||
.userAgent(USER_AGENT)
|
||||
.method(Connection.Method.GET)
|
||||
.execute();
|
||||
|
||||
Connection.Response resp = Jsoup.connect(theurl)
|
||||
.timeout(5000)
|
||||
.referrer("")
|
||||
.userAgent(USER_AGENT)
|
||||
.method(Connection.Method.GET)
|
||||
.execute();
|
||||
Document doc = resp.parse();
|
||||
//Element gallery = doc.getElementById("thegalmain");
|
||||
//Elements piclinks = gallery.getElementsByAttributeValue("itemprop","contentURL");
|
||||
Pattern pat = Pattern.compile(gid+"/\\d+");
|
||||
Elements piclinks = doc.getElementsByAttributeValueMatching("href", pat);
|
||||
for (Element picelem : piclinks) {
|
||||
String picurl = "http://newsfilter.org"+picelem.attr("href");
|
||||
logger.info("Getting to picture page: "+picurl);
|
||||
resp = Jsoup.connect(picurl)
|
||||
.timeout(5000)
|
||||
.referrer(theurl)
|
||||
.userAgent(USER_AGENT)
|
||||
.method(Connection.Method.GET)
|
||||
.execute();
|
||||
Document picdoc = resp.parse();
|
||||
String dlurl = picdoc.getElementsByAttributeValue("itemprop","contentURL").first().attr("src");
|
||||
addURLToDownload(new URL(dlurl));
|
||||
|
||||
Elements thumbnails = doc.select("#galleryImages .inner-block img");
|
||||
for (Element thumb : thumbnails) {
|
||||
String thumbUrl = thumb.attr("src");
|
||||
String picUrl = thumbUrl.replace("thumbs/", "");
|
||||
addURLToDownload(new URL(picUrl));
|
||||
}
|
||||
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
@ -86,9 +75,8 @@ public class NewsfilterRipper extends AlbumRipper {
|
||||
if (m.matches()) {
|
||||
return m.group(2);
|
||||
}
|
||||
throw new MalformedURLException("Expected newsfilter gallery format: "
|
||||
+ "http://newsfilter.org/gallery/galleryid"
|
||||
+ " Got: " + url);
|
||||
throw new MalformedURLException(
|
||||
"Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" +
|
||||
" Got: " + url);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,89 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
public class SinnercomicsRipper extends AbstractHTMLRipper {
|
||||
|
||||
public SinnercomicsRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "sinnercomics";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "sinnercomics.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://sinnercomics.com/comic/([a-zA-Z0-9-]*)/?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected sinnercomics.com URL format: " +
|
||||
"sinnercomics.com/comic/albumName - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// Find next page
|
||||
String nextUrl = "";
|
||||
// We use comic-nav-next to the find the next page
|
||||
Element elem = doc.select("a.comic-nav-next").first();
|
||||
if (elem == null) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
String nextPage = elem.attr("href");
|
||||
// Wait half a sec to avoid IP bans
|
||||
sleep(500);
|
||||
return Http.url(nextPage).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<String>();
|
||||
for (Element el : doc.select("meta[property=og:image]")) {
|
||||
String imageSource = el.attr("content");
|
||||
imageSource = imageSource.replace(" alt=", "");
|
||||
result.add(imageSource);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,78 @@
|
||||
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
public class ThechiveRipper extends AbstractHTMLRipper {
|
||||
public static boolean isTag;
|
||||
|
||||
public ThechiveRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "thechive";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "thechive.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
isTag = false;
|
||||
return m.group(1);
|
||||
}
|
||||
throw new MalformedURLException("Expected thechive.com URL format: " +
|
||||
"thechive.com/YEAR/MONTH/DAY/POSTTITLE/ - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> result = new ArrayList<String>();
|
||||
for (Element el : doc.select("img.attachment-gallery-item-full")) {
|
||||
String imageSource = el.attr("src");
|
||||
// We replace thumbs with resizes so we can the full sized images
|
||||
imageSource = imageSource.replace("thumbs", "resizes");
|
||||
result.add(imageSource);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -48,6 +48,7 @@ public class XhamsterRipper extends AlbumRipper {
|
||||
image = image.replaceAll(
|
||||
"https://upt.xhcdn\\.",
|
||||
"http://up.xhamster.");
|
||||
image = image.replaceAll("ept\\.xhcdn", "ep.xhamster");
|
||||
image = image.replaceAll(
|
||||
"_160\\.",
|
||||
"_1000.");
|
||||
|
@ -0,0 +1,125 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class ZizkiRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Document albumDoc = null;
|
||||
private Map<String,String> cookies = new HashMap<String,String>();
|
||||
|
||||
public ZizkiRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "zizki";
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "zizki.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://(www\\.)?zizki\\.com/([a-zA-Z0-9\\-_]+).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (!m.matches()) {
|
||||
throw new MalformedURLException("Expected URL format: http://www.zizki.com/author/albumname, got: " + url);
|
||||
}
|
||||
return m.group(m.groupCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[name=description]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
|
||||
Element authorSpan = getFirstPage().select("span[class=creator]").first();
|
||||
String author = authorSpan.select("a").first().text();
|
||||
logger.debug("Author: " + author);
|
||||
return getHost() + "_" + author + "_" + title.trim();
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
logger.info("Unable to find title at " + url);
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
Response resp = Http.url(url).response();
|
||||
cookies.putAll(resp.cookies());
|
||||
albumDoc = resp.parse();
|
||||
}
|
||||
return albumDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<String>();
|
||||
// Page contains images
|
||||
logger.info("Look for images.");
|
||||
for (Element thumb : page.select("img")) {
|
||||
logger.info("Img");
|
||||
if (super.isStopped()) break;
|
||||
// Find thumbnail image source
|
||||
String image = null;
|
||||
String img_type = null;
|
||||
String src = null;
|
||||
if (thumb.hasAttr("typeof")) {
|
||||
img_type = thumb.attr("typeof");
|
||||
if (img_type.equals("foaf:Image")) {
|
||||
logger.debug("Found image with " + img_type);
|
||||
if (thumb.parent() != null &&
|
||||
thumb.parent().parent() != null &&
|
||||
thumb.parent().parent().attr("class") != null &&
|
||||
thumb.parent().parent().attr("class").equals("aimage-center")
|
||||
)
|
||||
{
|
||||
src = thumb.attr("src");
|
||||
logger.debug("Found url with " + src);
|
||||
if (!src.contains("zizki.com")) {
|
||||
continue;
|
||||
} else {
|
||||
imageURLs.add(src.replace("/styles/medium/public/","/styles/large/public/"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPrefix(int index) {
|
||||
return String.format("%03d_", index);
|
||||
}
|
||||
}
|
@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
|
||||
public class UpdateUtils {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
|
||||
private static final String DEFAULT_VERSION = "1.4.2";
|
||||
private static final String DEFAULT_VERSION = "1.4.7";
|
||||
private static final String updateJsonURL = "https://raw.githubusercontent.com/4pr0n/ripme/master/ripme.json";
|
||||
private static final String mainFileName = "ripme.jar";
|
||||
private static final String updateFileName = "ripme.jar.update";
|
||||
|
@ -3,13 +3,16 @@ package com.rarchives.ripme.utils;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.jar.JarEntry;
|
||||
import java.util.jar.JarFile;
|
||||
|
||||
@ -387,4 +390,73 @@ public class Utils {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an URL query
|
||||
*
|
||||
* @param query
|
||||
* The query part of an URL
|
||||
* @return The map of all query parameters
|
||||
*/
|
||||
public static Map<String,String> parseUrlQuery(String query) {
|
||||
Map<String,String> res = new HashMap<String, String>();
|
||||
|
||||
if (query.equals("")){
|
||||
return res;
|
||||
}
|
||||
|
||||
String[] parts = query.split("&");
|
||||
int pos;
|
||||
|
||||
try {
|
||||
for (String part : parts) {
|
||||
if ((pos = part.indexOf('=')) >= 0){
|
||||
res.put(URLDecoder.decode(part.substring(0, pos), "UTF-8"), URLDecoder.decode(part.substring(pos + 1), "UTF-8"));
|
||||
}else{
|
||||
res.put(URLDecoder.decode(part, "UTF-8"), "");
|
||||
}
|
||||
}
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// Shouldn't happen since UTF-8 is required to be supported
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an URL query and returns the requested parameter's value
|
||||
*
|
||||
* @param query
|
||||
* The query part of an URL
|
||||
* @param key
|
||||
* The key whose value is requested
|
||||
* @return The associated value or null if key wasn't found
|
||||
*/
|
||||
public static String parseUrlQuery(String query, String key) {
|
||||
if (query.equals("")){
|
||||
return null;
|
||||
}
|
||||
|
||||
String[] parts = query.split("&");
|
||||
int pos;
|
||||
|
||||
try {
|
||||
for (String part : parts) {
|
||||
if ((pos = part.indexOf('=')) >= 0) {
|
||||
if (URLDecoder.decode(part.substring(0, pos), "UTF-8").equals(key)){
|
||||
return URLDecoder.decode(part.substring(pos + 1), "UTF-8");
|
||||
}
|
||||
|
||||
} else if (URLDecoder.decode(part, "UTF-8").equals(key)) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// Shouldn't happen since UTF-8 is required to be supported
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user