1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-09-03 02:42:47 +02:00

new URL(string) replaced with new URI(string).toURL(), as deprecated in java-20

This commit is contained in:
soloturn
2023-06-12 00:27:18 +02:00
parent 9a2ee24ab0
commit 0b500354ca
3 changed files with 246 additions and 243 deletions

View File

@@ -1,234 +1,236 @@
package com.rarchives.ripme.ripper.rippers; package com.rarchives.ripme.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URI;
import java.util.ArrayList; import java.net.URISyntaxException;
import java.util.HashMap; import java.net.URL;
import java.util.List; import java.util.ArrayList;
import java.util.Map; import java.util.HashMap;
import java.util.regex.Matcher; import java.util.List;
import java.util.regex.Pattern; import java.util.Map;
import org.jsoup.nodes.Document; import java.util.regex.Matcher;
import org.jsoup.nodes.Element; import java.util.regex.Pattern;
import org.jsoup.select.Elements; import org.jsoup.nodes.Document;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.DownloadThreadPool; import org.jsoup.select.Elements;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
/**
* @author Tushar
* /**
*/ * @author Tushar
public class ListalRipper extends AbstractHTMLRipper { *
*/
private Pattern p1 = Pattern.compile("https:\\/\\/www.listal.com\\/list\\/([a-zA-Z0-9-]+)"); public class ListalRipper extends AbstractHTMLRipper {
private Pattern p2 =
Pattern.compile("https:\\/\\/www.listal.com\\/((?:(?:[a-zA-Z0-9-_%]+)\\/?)+)"); private Pattern p1 = Pattern.compile("https:\\/\\/www.listal.com\\/list\\/([a-zA-Z0-9-]+)");
private String listId = null; // listId to get more images via POST. private Pattern p2 =
private String postUrl = "https://www.listal.com/item-list/"; //to load more images. Pattern.compile("https:\\/\\/www.listal.com\\/((?:(?:[a-zA-Z0-9-_%]+)\\/?)+)");
private UrlType urlType = UrlType.UNKNOWN; private String listId = null; // listId to get more images via POST.
private String postUrl = "https://www.listal.com/item-list/"; //to load more images.
private DownloadThreadPool listalThreadPool = new DownloadThreadPool("listalThreadPool"); private UrlType urlType = UrlType.UNKNOWN;
public ListalRipper(URL url) throws IOException { private DownloadThreadPool listalThreadPool = new DownloadThreadPool("listalThreadPool");
super(url);
} public ListalRipper(URL url) throws IOException {
super(url);
@Override }
public String getDomain() {
return "listal.com"; @Override
} public String getDomain() {
return "listal.com";
@Override }
public String getHost() {
return "listal"; @Override
} public String getHost() {
return "listal";
@Override }
public Document getFirstPage() throws IOException {
Document doc = Http.url(url).get(); @Override
if (urlType == UrlType.LIST) { public Document getFirstPage() throws IOException {
listId = doc.select("#customlistitems").first().attr("data-listid"); // Used for list types. Document doc = Http.url(url).get();
} if (urlType == UrlType.LIST) {
return doc; listId = doc.select("#customlistitems").first().attr("data-listid"); // Used for list types.
} }
return doc;
@Override }
public List<String> getURLsFromPage(Document page) {
if (urlType == UrlType.LIST) { @Override
// for url of type LIST, https://www.listal.com/list/my-list public List<String> getURLsFromPage(Document page) {
return getURLsForListType(page); if (urlType == UrlType.LIST) {
} else if (urlType == UrlType.FOLDER) { // for url of type LIST, https://www.listal.com/list/my-list
// for url of type FOLDER, https://www.listal.com/jim-carrey/pictures return getURLsForListType(page);
return getURLsForFolderType(page); } else if (urlType == UrlType.FOLDER) {
} // for url of type FOLDER, https://www.listal.com/jim-carrey/pictures
return null; return getURLsForFolderType(page);
} }
return null;
@Override }
public void downloadURL(URL url, int index) {
listalThreadPool.addThread(new ListalImageDownloadThread(url, index)); @Override
} public void downloadURL(URL url, int index) {
listalThreadPool.addThread(new ListalImageDownloadThread(url, index));
@Override }
public String getGID(URL url) throws MalformedURLException {
Matcher m1 = p1.matcher(url.toExternalForm()); @Override
if (m1.matches()) { public String getGID(URL url) throws MalformedURLException {
// Return the text contained between () in the regex Matcher m1 = p1.matcher(url.toExternalForm());
urlType = UrlType.LIST; if (m1.matches()) {
return m1.group(1); // Return the text contained between () in the regex
} urlType = UrlType.LIST;
return m1.group(1);
Matcher m2 = p2.matcher(url.toExternalForm()); }
if (m2.matches()) {
// Return only gid from capturing group of type listal.com/tvOrSomething/dexter/pictures Matcher m2 = p2.matcher(url.toExternalForm());
urlType = UrlType.FOLDER; if (m2.matches()) {
return getFolderTypeGid(m2.group(1)); // Return only gid from capturing group of type listal.com/tvOrSomething/dexter/pictures
} urlType = UrlType.FOLDER;
return getFolderTypeGid(m2.group(1));
throw new MalformedURLException("Expected listal.com URL format: " }
+ "listal.com/list/my-list-name - got " + url + " instead.");
} throw new MalformedURLException("Expected listal.com URL format: "
+ "listal.com/list/my-list-name - got " + url + " instead.");
@Override }
public Document getNextPage(Document page) throws IOException {
Document nextPage = super.getNextPage(page); @Override
switch (urlType) { public Document getNextPage(Document page) throws IOException {
case LIST: Document nextPage = super.getNextPage(page);
if (!page.select(".loadmoreitems").isEmpty()) { switch (urlType) {
// All items are not loaded. case LIST:
// Load remaining items using postUrl. if (!page.select(".loadmoreitems").isEmpty()) {
// All items are not loaded.
String offSet = page.select(".loadmoreitems").last().attr("data-offset"); // Load remaining items using postUrl.
Map<String, String> postParams = new HashMap<>();
postParams.put("listid", listId); String offSet = page.select(".loadmoreitems").last().attr("data-offset");
postParams.put("offset", offSet); Map<String, String> postParams = new HashMap<>();
try { postParams.put("listid", listId);
nextPage = Http.url(postUrl).data(postParams).retries(3).post(); postParams.put("offset", offSet);
} catch (IOException e1) { try {
LOGGER.error("Failed to load more images after " + offSet, e1); nextPage = Http.url(postUrl).data(postParams).retries(3).post();
throw e1; } catch (IOException e1) {
} LOGGER.error("Failed to load more images after " + offSet, e1);
} throw e1;
break; }
}
case FOLDER: break;
Elements pageLinks = page.select(".pages a");
if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) { case FOLDER:
String nextUrl = pageLinks.last().attr("abs:href"); Elements pageLinks = page.select(".pages a");
nextPage = Http.url(nextUrl).retries(3).get(); if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) {
} String nextUrl = pageLinks.last().attr("abs:href");
break; nextPage = Http.url(nextUrl).retries(3).get();
}
case UNKNOWN: break;
default:
} case UNKNOWN:
return nextPage; default:
} }
return nextPage;
}
@Override
public DownloadThreadPool getThreadPool() {
return listalThreadPool; @Override
} public DownloadThreadPool getThreadPool() {
return listalThreadPool;
/** }
* Returns the image urls for UrlType LIST.
*/ /**
private List<String> getURLsForListType(Document page) { * Returns the image urls for UrlType LIST.
List<String> list = new ArrayList<>(); */
for (Element e : page.select(".pure-g a[href*=viewimage]")) { private List<String> getURLsForListType(Document page) {
//list.add("https://www.listal.com" + e.attr("href") + "h"); List<String> list = new ArrayList<>();
list.add(e.attr("abs:href") + "h"); for (Element e : page.select(".pure-g a[href*=viewimage]")) {
} //list.add("https://www.listal.com" + e.attr("href") + "h");
list.add(e.attr("abs:href") + "h");
return list; }
}
return list;
/** }
* Returns the image urls for UrlType FOLDER.
*/ /**
private List<String> getURLsForFolderType(Document page) { * Returns the image urls for UrlType FOLDER.
List<String> list = new ArrayList<>(); */
for (Element e : page.select("#browseimagescontainer .imagewrap-outer a")) { private List<String> getURLsForFolderType(Document page) {
list.add(e.attr("abs:href") + "h"); List<String> list = new ArrayList<>();
} for (Element e : page.select("#browseimagescontainer .imagewrap-outer a")) {
return list; list.add(e.attr("abs:href") + "h");
} }
return list;
/** }
* Returns the gid for url type listal.com/tvOrSomething/dexter/pictures
*/ /**
public String getFolderTypeGid(String group) throws MalformedURLException { * Returns the gid for url type listal.com/tvOrSomething/dexter/pictures
String[] folders = group.split("/"); */
try { public String getFolderTypeGid(String group) throws MalformedURLException {
if (folders.length == 2 && folders[1].equals("pictures")) { String[] folders = group.split("/");
// Url is probably for an actor. try {
return folders[0]; if (folders.length == 2 && folders[1].equals("pictures")) {
} // Url is probably for an actor.
return folders[0];
if (folders.length == 3 && folders[2].equals("pictures")) { }
// Url if for a folder(like movies, tv etc).
Document doc = Http.url(url).get(); if (folders.length == 3 && folders[2].equals("pictures")) {
return doc.select(".itemheadingmedium").first().text(); // Url if for a folder(like movies, tv etc).
} Document doc = Http.url(url).get();
return doc.select(".itemheadingmedium").first().text();
} catch (Exception e) { }
LOGGER.error(e);
} } catch (Exception e) {
throw new MalformedURLException("Unable to fetch the gid for given url."); LOGGER.error(e);
} }
throw new MalformedURLException("Unable to fetch the gid for given url.");
private class ListalImageDownloadThread implements Runnable { }
private final URL url; private class ListalImageDownloadThread implements Runnable {
private final int index;
private final URL url;
public ListalImageDownloadThread(URL url, int index) { private final int index;
super();
this.url = url; public ListalImageDownloadThread(URL url, int index) {
this.index = index; super();
} this.url = url;
this.index = index;
@Override }
public void run() {
getImage(); @Override
} public void run() {
getImage();
public void getImage() { }
try {
Document doc = Http.url(url).get(); public void getImage() {
try {
String imageUrl = doc.getElementsByClass("pure-img").attr("src"); Document doc = Http.url(url).get();
if (imageUrl != "") {
addURLToDownload(new URL(imageUrl), getPrefix(index), "", null, null, String imageUrl = doc.getElementsByClass("pure-img").attr("src");
getImageName()); if (imageUrl != "") {
} else { addURLToDownload(new URI(imageUrl).toURL(), getPrefix(index), "", null, null,
LOGGER.error("Couldnt find image from url: " + url); getImageName());
} } else {
} catch (IOException e) { LOGGER.error("Couldnt find image from url: " + url);
LOGGER.error("[!] Exception while downloading image: " + url, e); }
} } catch (IOException | URISyntaxException e) {
} LOGGER.error("[!] Exception while downloading image: " + url, e);
}
public String getImageName() { }
// Returns the image number of the link if possible.
String name = this.url.toExternalForm(); public String getImageName() {
try { // Returns the image number of the link if possible.
name = name.substring(name.lastIndexOf("/") + 1); String name = this.url.toExternalForm();
} catch (Exception e) { try {
LOGGER.info("Failed to get name for the image."); name = name.substring(name.lastIndexOf("/") + 1);
name = null; } catch (Exception e) {
} LOGGER.info("Failed to get name for the image.");
// Listal stores images as .jpg name = null;
return name + ".jpg"; }
} // Listal stores images as .jpg
} return name + ".jpg";
}
private static enum UrlType { }
LIST, FOLDER, UNKNOWN
} private static enum UrlType {
} LIST, FOLDER, UNKNOWN
}
}

View File

@@ -100,7 +100,7 @@ public class History {
public void fromFile(String filename) throws IOException { public void fromFile(String filename) throws IOException {
try (InputStream is = new FileInputStream(filename)) { try (InputStream is = new FileInputStream(filename)) {
String jsonString = IOUtils.toString(is); String jsonString = IOUtils.toString(is, "UTF-8");
JSONArray jsonArray = new JSONArray(jsonString); JSONArray jsonArray = new JSONArray(jsonString);
fromJSON(jsonArray); fromJSON(jsonArray);
} catch (JSONException e) { } catch (JSONException e) {
@@ -134,7 +134,7 @@ public class History {
public void toFile(String filename) throws IOException { public void toFile(String filename) throws IOException {
try (OutputStream os = new FileOutputStream(filename)) { try (OutputStream os = new FileOutputStream(filename)) {
IOUtils.write(toJSON().toString(2), os); IOUtils.write(toJSON().toString(2), os, "UTF-8");
} }
} }
} }

View File

@@ -4,7 +4,8 @@ import com.rarchives.ripme.ripper.AbstractRipper;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URI;
import java.net.URISyntaxException;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -12,20 +13,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
public class AbstractRipperTest { public class AbstractRipperTest {
@Test @Test
public void testGetFileName() throws IOException { public void testGetFileName() throws IOException, URISyntaxException {
String fileName = AbstractRipper.getFileName(new URL("http://www.tsumino.com/Image/Object?name=U1EieteEGwm6N1dGszqCpA%3D%3D"),null, "test", "test"); String fileName = AbstractRipper.getFileName(new URI("http://www.tsumino.com/Image/Object?name=U1EieteEGwm6N1dGszqCpA%3D%3D").toURL(),null, "test", "test");
assertEquals("test.test", fileName); assertEquals("test.test", fileName);
fileName = AbstractRipper.getFileName(new URL("http://www.tsumino.com/Image/Object?name=U1EieteEGwm6N1dGszqCpA%3D%3D"), null,"test", null); fileName = AbstractRipper.getFileName(new URI("http://www.tsumino.com/Image/Object?name=U1EieteEGwm6N1dGszqCpA%3D%3D").toURL(), null,"test", null);
assertEquals("test", fileName); assertEquals("test", fileName);
fileName = AbstractRipper.getFileName(new URL("http://www.tsumino.com/Image/Object?name=U1EieteEGwm6N1dGszqCpA%3D%3D"), null,null, null); fileName = AbstractRipper.getFileName(new URI("http://www.tsumino.com/Image/Object?name=U1EieteEGwm6N1dGszqCpA%3D%3D").toURL(), null,null, null);
assertEquals("Object", fileName); assertEquals("Object", fileName);
fileName = AbstractRipper.getFileName(new URL("http://www.test.com/file.png"), null,null, null); fileName = AbstractRipper.getFileName(new URI("http://www.test.com/file.png").toURL(), null,null, null);
assertEquals("file.png", fileName); assertEquals("file.png", fileName);
fileName = AbstractRipper.getFileName(new URL("http://www.test.com/file."), null,null, null); fileName = AbstractRipper.getFileName(new URI("http://www.test.com/file.").toURL(), null,null, null);
assertEquals("file.", fileName); assertEquals("file.", fileName);
} }