From 19ec16bf7f81c9af68aa62a69f6d71120bc56e8c Mon Sep 17 00:00:00 2001 From: 0x1f595 <0x1f595@users.noreply.github.com> Date: Tue, 8 Oct 2019 21:20:00 -0600 Subject: [PATCH] Adding several Mastodon instance rippers --- .../ripme/ripper/rippers/ArtAlleyRipper.java | 20 ++++ .../ripme/ripper/rippers/BaraagRipper.java | 20 ++++ .../ripme/ripper/rippers/MastodonRipper.java | 96 +++++++++++++++++++ .../ripper/rippers/MastodonXyzRipper.java | 20 ++++ .../ripme/ripper/rippers/PawooRipper.java | 20 ++++ 5 files changed, 176 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/ArtAlleyRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/BaraagRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/MastodonRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/MastodonXyzRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ArtAlleyRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtAlleyRipper.java new file mode 100644 index 00000000..c70a64bc --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtAlleyRipper.java @@ -0,0 +1,20 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +public class ArtAlleyRipper extends MastodonRipper { + public ArtAlleyRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "artalley"; + } + + @Override + public String getDomain() { + return "artalley.social"; + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/BaraagRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/BaraagRipper.java new file mode 100644 index 00000000..8d39864c --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/BaraagRipper.java @@ -0,0 +1,20 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +public class BaraagRipper extends MastodonRipper { + public BaraagRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "baraag"; + } + + @Override + public String getDomain() { + return "baraag.net"; + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonRipper.java new file mode 100644 index 00000000..53f9401c --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonRipper.java @@ -0,0 +1,96 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONObject; +import org.json.JSONArray; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +public class MastodonRipper extends AbstractHTMLRipper { + private Map itemIDs = Collections.synchronizedMap(new HashMap()); + + public MastodonRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "mastodon"; + } + + @Override + public String getDomain() { + return "mastodon.social"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://(" + getDomain() + ")/@([a-zA-Z0-9_-]+)(/media/?)?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Return the text contained between () in the regex + return m.group(1) + "@" + m.group(2); + } + throw new MalformedURLException( + "Expected " + getDomain() + " URL format: " + + getDomain() + "/@username - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + Pattern p = Pattern.compile("^/@[a-zA-Z0-9_-]+/media/?$"); + Matcher m = p.matcher(url.getPath()); + if (m.matches()) { + return Http.url(url).get(); + } + return Http.url(url.toExternalForm().replaceAll("/$", "") + "/media").get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + Elements hrefs = doc.select(".h-entry + .entry > a.load-more.load-gap"); + if (hrefs.isEmpty()) { + throw new IOException("No more pages"); + } + String nextUrl = hrefs.last().attr("href"); + sleep(500); + return Http.url(nextUrl).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList(); + for (Element el : doc.select("[data-component=\"MediaGallery\"]")) { + String props = el.attr("data-props"); + JSONObject obj = new JSONObject(props); + JSONArray arr = obj.getJSONArray("media"); + for (int i = 0; i < arr.length(); i++) { + String url = arr.getJSONObject(i).getString("url"); + result.add(url); + String id = arr.getJSONObject(i).getString("id"); + itemIDs.put(url, id); + } + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, itemIDs.get(url.toString()) + "_"); + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonXyzRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonXyzRipper.java new file mode 100644 index 00000000..8b621526 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonXyzRipper.java @@ -0,0 +1,20 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +public class MastodonXyzRipper extends MastodonRipper { + public MastodonXyzRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "mastodonxyz"; + } + + @Override + public String getDomain() { + return "mastodon.xyz"; + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java new file mode 100644 index 00000000..8f5c8c37 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java @@ -0,0 +1,20 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +public class PawooRipper extends MastodonRipper { + public PawooRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "pawoo"; + } + + @Override + public String getDomain() { + return "pawoo.net"; + } +}