1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-01-19 05:28:03 +01:00

Fetch imgur series (comma-separated images) in Reddit ripper.

Closes #218
This commit is contained in:
4pr0n 2015-12-21 14:47:07 -08:00
parent 0ba092813c
commit 9fd7bf7663
3 changed files with 52 additions and 5 deletions

View File

@ -159,6 +159,39 @@ public class ImgurRipper extends AlbumRipper {
}
}
public static ImgurAlbum getImgurSeries(URL url) throws IOException {
Pattern p = Pattern.compile("^.*imgur\\.com/([a-zA-Z0-9,]*).*$");
Matcher m = p.matcher(url.toExternalForm());
ImgurAlbum album = new ImgurAlbum(url);
if (m.matches()) {
String[] imageIds = m.group(1).split(",");
for (String imageId : imageIds) {
// TODO: Fetch image with ID imageId
logger.debug("Fetching image info for ID " + imageId);;
try {
JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
if (!json.has("image")) {
continue;
}
JSONObject image = json.getJSONObject("image");
if (!image.has("links")) {
continue;
}
JSONObject links = image.getJSONObject("links");
if (!links.has("original")) {
continue;
}
String original = links.getString("original");
ImgurImage theImage = new ImgurImage(new URL(original));
album.addImage(theImage);
} catch (Exception e) {
logger.error("Got exception while fetching imgur ID " + imageId, e);
}
}
}
return album;
}
public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
logger.info(" Retrieving " + url.toExternalForm());
Document doc = Jsoup.connect(url.toExternalForm())
@ -362,7 +395,7 @@ public class ImgurRipper extends AlbumRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(m\\.)?imgur\\.com/a/([a-zA-Z0-9]{5,8}).*$");
Pattern p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/a/([a-zA-Z0-9]{5,8}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur album
@ -371,7 +404,7 @@ public class ImgurRipper extends AlbumRipper {
this.url = new URL("http://imgur.com/a/" + gid);
return gid;
}
p = Pattern.compile("^https?://(m\\.)?imgur\\.com/gallery/([a-zA-Z0-9]{5,8}).*$");
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/gallery/([a-zA-Z0-9]{5,8}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur gallery
@ -405,7 +438,7 @@ public class ImgurRipper extends AlbumRipper {
albumType = ALBUM_TYPE.USER_ALBUM;
return m.group(1) + "-" + m.group(2);
}
p = Pattern.compile("^https?://(www\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week)?/?$");
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week)?/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur subreddit aggregator
@ -418,7 +451,7 @@ public class ImgurRipper extends AlbumRipper {
}
return album;
}
p = Pattern.compile("^https?://(i\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Series of imgur images

View File

@ -44,6 +44,19 @@ public class RipUtils {
}
return result;
}
else if (url.getHost().endsWith("imgur.com") && url.toExternalForm().contains(",")) {
// Imgur image series.
try {
logger.debug("Fetching imgur series at " + url);
ImgurAlbum imgurAlbum = ImgurRipper.getImgurSeries(url);
for (ImgurImage imgurImage : imgurAlbum.images) {
logger.debug("Got imgur image: " + imgurImage.url);
result.add(imgurImage.url);
}
} catch (IOException e) {
logger.error("[!] Exception while loading album " + url, e);
}
}
else if (url.getHost().endsWith("gfycat.com")) {
try {
logger.debug("Fetching gfycat page " + url);

View File

@ -39,9 +39,10 @@ public class ImgurRipperTest extends RippersTest {
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0"));
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0"));
contentURLs.add(new URL("http://imgur.com/gallery/FmP2o")); // Gallery URL
contentURLs.add(new URL("http://imgur.com/758qD43,C6iVJex,bP7flAu,J3l85Ri,1U7fhu5,MbuAUCM,JF4vOXQ"));
// Sometimes hangs up
//contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
contentURLs.add(new URL("http://imgur.com/a/bXQpH")); // Album with titles/descriptions
//contentURLs.add(new URL("http://imgur.com/a/bXQpH")); // Album with titles/descriptions
for (URL url : contentURLs) {
ImgurRipper ripper = new ImgurRipper(url);
testRipper(ripper);