1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-06 05:47:29 +02:00

Adding several Mastodon instance rippers

This commit is contained in:
0x1f595
2019-10-08 21:20:00 -06:00
parent bb287b7331
commit 19ec16bf7f
5 changed files with 176 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.URL;
public class ArtAlleyRipper extends MastodonRipper {
public ArtAlleyRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "artalley";
}
@Override
public String getDomain() {
return "artalley.social";
}
}

View File

@@ -0,0 +1,20 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.URL;
public class BaraagRipper extends MastodonRipper {
public BaraagRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "baraag";
}
@Override
public String getDomain() {
return "baraag.net";
}
}

View File

@@ -0,0 +1,96 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONObject;
import org.json.JSONArray;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class MastodonRipper extends AbstractHTMLRipper {
private Map<String, String> itemIDs = Collections.synchronizedMap(new HashMap<String, String>());
public MastodonRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "mastodon";
}
@Override
public String getDomain() {
return "mastodon.social";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(" + getDomain() + ")/@([a-zA-Z0-9_-]+)(/media/?)?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Return the text contained between () in the regex
return m.group(1) + "@" + m.group(2);
}
throw new MalformedURLException(
"Expected " + getDomain() + " URL format: " +
getDomain() + "/@username - got " + url + " instead");
}
@Override
public Document getFirstPage() throws IOException {
Pattern p = Pattern.compile("^/@[a-zA-Z0-9_-]+/media/?$");
Matcher m = p.matcher(url.getPath());
if (m.matches()) {
return Http.url(url).get();
}
return Http.url(url.toExternalForm().replaceAll("/$", "") + "/media").get();
}
@Override
public Document getNextPage(Document doc) throws IOException {
Elements hrefs = doc.select(".h-entry + .entry > a.load-more.load-gap");
if (hrefs.isEmpty()) {
throw new IOException("No more pages");
}
String nextUrl = hrefs.last().attr("href");
sleep(500);
return Http.url(nextUrl).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("[data-component=\"MediaGallery\"]")) {
String props = el.attr("data-props");
JSONObject obj = new JSONObject(props);
JSONArray arr = obj.getJSONArray("media");
for (int i = 0; i < arr.length(); i++) {
String url = arr.getJSONObject(i).getString("url");
result.add(url);
String id = arr.getJSONObject(i).getString("id");
itemIDs.put(url, id);
}
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, itemIDs.get(url.toString()) + "_");
}
}

View File

@@ -0,0 +1,20 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.URL;
public class MastodonXyzRipper extends MastodonRipper {
public MastodonXyzRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "mastodonxyz";
}
@Override
public String getDomain() {
return "mastodon.xyz";
}
}

View File

@@ -0,0 +1,20 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.URL;
public class PawooRipper extends MastodonRipper {
public PawooRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "pawoo";
}
@Override
public String getDomain() {
return "pawoo.net";
}
}