1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-26 23:34:53 +02:00

Merge pull request #1072 from PeterSzakacs/issue/1067

Fix issue with Photobucket downloads containing watermark.
This commit is contained in:
cyian-1756
2018-11-28 13:02:48 -05:00
committed by GitHub
2 changed files with 37 additions and 33 deletions

View File

@@ -5,11 +5,13 @@ import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONObject; import org.json.JSONObject;
import org.jsoup.Connection;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
@@ -26,22 +28,25 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
private static final int WAIT_BEFORE_NEXT_PAGE = 2000; private static final int WAIT_BEFORE_NEXT_PAGE = 2000;
private final class AlbumMetadata { private final class AlbumMetadata {
private final String url; private final String baseURL;
private final String location; private final String location;
private final int sortOrder; private final int sortOrder;
private int currPage = 1; // cookies for the current page of this album
private Map<String, String> cookies;
private Document currPage;
private int numPages; private int numPages;
private int pageIndex = 1;
private AlbumMetadata(JSONObject data) { private AlbumMetadata(JSONObject data) {
this.url = data.getString("url"); this.baseURL = data.getString("url");
this.location = data.getString("location") this.location = data.getString("location")
.replace(" ", "_"); .replace(" ", "_");
this.sortOrder = data.getInt("sortOrder"); this.sortOrder = data.getInt("sortOrder");
} }
private String getCurrPageURL(){ private String getCurrPageURL(){
return url + String.format("?sort=%d&page=%d", return baseURL + String.format("?sort=%d&page=%d",
sortOrder, currPage); sortOrder, pageIndex);
} }
} }
@@ -120,7 +125,7 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
@Override @Override
protected Document getFirstPage() throws IOException { public Document getFirstPage() throws IOException {
if (this.currAlbum == null) { if (this.currAlbum == null) {
this.albums = getAlbumMetadata(this.url.toExternalForm()); this.albums = getAlbumMetadata(this.url.toExternalForm());
LOGGER.info("Detected " + albums.size() + " albums in total"); LOGGER.info("Detected " + albums.size() + " albums in total");
@@ -136,19 +141,21 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
// http://s1255.photobucket.com/api/user/mimajki/album/Movie%20gifs/get?subAlbums=48&json=1 // http://s1255.photobucket.com/api/user/mimajki/album/Movie%20gifs/get?subAlbums=48&json=1
// Actual item count when looking at the album url: 131 items/6 pages // Actual item count when looking at the album url: 131 items/6 pages
// http://s1255.photobucket.com/user/mimajki/library/Movie%20gifs?sort=6&page=1 // http://s1255.photobucket.com/user/mimajki/library/Movie%20gifs?sort=6&page=1
Document page = Http.url(currAlbum.getCurrPageURL()).get(); Connection.Response resp = Http.url(currAlbum.getCurrPageURL()).response();
JSONObject collectionData = getCollectionData(page); this.currAlbum.cookies = resp.cookies();
this.currAlbum.currPage = resp.parse();
JSONObject collectionData = getCollectionData(currAlbum.currPage);
int totalNumItems = collectionData.getInt("total"); int totalNumItems = collectionData.getInt("total");
this.currAlbum.numPages = (int) Math.ceil( this.currAlbum.numPages = (int) Math.ceil(
(double)totalNumItems / (double) ITEMS_PER_PAGE); (double)totalNumItems / (double)ITEMS_PER_PAGE);
this.index = 0; this.index = 0;
return page; return currAlbum.currPage;
} }
@Override @Override
public Document getNextPage(Document page) throws IOException { public Document getNextPage(Document page) throws IOException {
currAlbum.currPage++; this.currAlbum.pageIndex++;
boolean endOfAlbum = currAlbum.currPage > currAlbum.numPages; boolean endOfAlbum = currAlbum.pageIndex > currAlbum.numPages;
boolean noMoreSubalbums = albums.isEmpty(); boolean noMoreSubalbums = albums.isEmpty();
if (endOfAlbum && noMoreSubalbums){ if (endOfAlbum && noMoreSubalbums){
throw new IOException("No more pages"); throw new IOException("No more pages");
@@ -159,12 +166,15 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
LOGGER.info("Interrupted while waiting before getting next page"); LOGGER.info("Interrupted while waiting before getting next page");
} }
if (endOfAlbum){ if (endOfAlbum){
LOGGER.info("Turning to next album " + albums.get(0).url); LOGGER.info("Turning to next album " + albums.get(0).baseURL);
return getFirstPage(); return getFirstPage();
} else { } else {
LOGGER.info("Turning to page " + currAlbum.currPage + LOGGER.info("Turning to page " + currAlbum.pageIndex +
" of album " + currAlbum.url); " of album " + currAlbum.baseURL);
return Http.url(currAlbum.getCurrPageURL()).get(); Connection.Response resp = Http.url(currAlbum.getCurrPageURL()).response();
currAlbum.cookies = resp.cookies();
currAlbum.currPage = resp.parse();
return currAlbum.currPage;
} }
} }
@@ -179,7 +189,9 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
JSONObject collectionData = getCollectionData(page); JSONObject collectionData = getCollectionData(page);
if (collectionData == null) { if (collectionData == null) {
LOGGER.error("Unable to find JSON data at URL: " + page.location()); LOGGER.error("Unable to find JSON data at URL: " + page.location());
return null; // probably better than returning null, as the ripper will display
// that nothing was found instead of a NullPointerException
return new ArrayList<>();
} else { } else {
return getImageURLs(collectionData); return getImageURLs(collectionData);
} }
@@ -198,12 +210,12 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
} }
} }
} }
return null; return null;
} }
private List<String> getImageURLs(JSONObject json){ private List<String> getImageURLs(JSONObject collectionData){
List<String> results = new ArrayList<>(); List<String> results = new ArrayList<>();
JSONObject items = json.getJSONObject("items"); JSONObject items = collectionData.getJSONObject("items");
JSONArray objects = items.getJSONArray("objects"); JSONArray objects = items.getJSONArray("objects");
for (int i = 0; i < objects.length(); i++) { for (int i = 0; i < objects.length(); i++) {
JSONObject object = objects.getJSONObject(i); JSONObject object = objects.getJSONObject(i);
@@ -215,7 +227,8 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
@Override @Override
protected void downloadURL(URL url, int index) { protected void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(++this.index), currAlbum.location); addURLToDownload(url, getPrefix(++this.index), currAlbum.location,
currAlbum.currPage.location(), currAlbum.cookies);
} }

View File

@@ -26,18 +26,9 @@ public class PhotobucketRipperTest extends RippersTest {
String baseURL = "http://s1255.photobucket.com/user/mimajki/library/Movie%20gifs?sort=6&page=1"; String baseURL = "http://s1255.photobucket.com/user/mimajki/library/Movie%20gifs?sort=6&page=1";
URL url = new URL(baseURL); URL url = new URL(baseURL);
PhotobucketRipper ripper = new PhotobucketRipper(url); PhotobucketRipper ripper = new PhotobucketRipper(url);
org.jsoup.nodes.Document page = null; org.jsoup.nodes.Document page = ripper.getFirstPage();
try { // NOTE: number of pages remaining includes the subalbums
// I'm not sure it makes much sense that getFirstPage() // of the current album
// is not public while getNextPage() is.
java.lang.reflect.Method method = ripper.getClass()
.getDeclaredMethod("getFirstPage");
method.setAccessible(true);
page = (org.jsoup.nodes.Document) method.invoke(ripper);
} catch (Exception e){
e.printStackTrace();
fail("Calling getFirstPage() failed");
}
int numPagesRemaining = 38; int numPagesRemaining = 38;
for (int idx = 0; idx < numPagesRemaining; idx++){ for (int idx = 0; idx < numPagesRemaining; idx++){
page = ripper.getNextPage(page); page = ripper.getNextPage(page);