1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-08 06:46:41 +02:00

Fix rgif search and add support for tag urls

This commit is contained in:
Tush-r
2024-08-29 17:21:01 +05:30
committed by soloturn
parent 37e98846bb
commit 0589c52e11

View File

@@ -1,6 +1,7 @@
package com.rarchives.ripme.ripper.rippers; package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONObject; import org.json.JSONObject;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
@@ -14,8 +15,14 @@ import java.net.MalformedURLException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.net.URL; import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@@ -30,6 +37,8 @@ public class RedgifsRipper extends AbstractJSONRipper {
private static final String GIFS_DETAIL_ENDPOINT = "https://api.redgifs.com/v2/gifs/%s"; private static final String GIFS_DETAIL_ENDPOINT = "https://api.redgifs.com/v2/gifs/%s";
private static final String USERS_SEARCH_ENDPOINT = "https://api.redgifs.com/v2/users/%s/search"; private static final String USERS_SEARCH_ENDPOINT = "https://api.redgifs.com/v2/users/%s/search";
private static final String GALLERY_ENDPOINT = "https://api.redgifs.com/v2/gallery/%s"; private static final String GALLERY_ENDPOINT = "https://api.redgifs.com/v2/gallery/%s";
private static final String SEARCH_ENDPOINT = "https://api.redgifs.com/v2/search/%s";
private static final String TAGS_ENDPOINT = "https://api.redgifs.com/v2/gifs/search";
private static final String TEMPORARY_AUTH_ENDPOINT = "https://api.redgifs.com/v2/auth/temporary"; private static final String TEMPORARY_AUTH_ENDPOINT = "https://api.redgifs.com/v2/auth/temporary";
String username = ""; String username = "";
String authToken = ""; String authToken = "";
@@ -76,7 +85,12 @@ public class RedgifsRipper extends AbstractJSONRipper {
} }
public Matcher isSearch() { public Matcher isSearch() {
Pattern p = Pattern.compile("^https?://[wm.]*redgifs\\.com/gifs/browse/([a-zA-Z0-9_.-]+).*$"); Pattern p = Pattern.compile("^https?:\\/\\/[wm.]*redgifs\\.com\\/search(?:\\/[a-zA-Z]+)?\\?.*?query=([a-zA-Z0-9-_+%]+).*$");
return p.matcher(url.toExternalForm());
}
public Matcher isTags() {
Pattern p = Pattern.compile("^https?:\\/\\/[wm.]*redgifs\\.com\\/gifs\\/([a-zA-Z0-9_.,-]+).*$");
return p.matcher(url.toExternalForm()); return p.matcher(url.toExternalForm());
} }
@@ -96,10 +110,10 @@ public class RedgifsRipper extends AbstractJSONRipper {
maxPages = 1; maxPages = 1;
String gifDetailsURL = String.format(GIFS_DETAIL_ENDPOINT, getGID(url)); String gifDetailsURL = String.format(GIFS_DETAIL_ENDPOINT, getGID(url));
return Http.url(gifDetailsURL).header("Authorization", "Bearer " + authToken).getJSON(); return Http.url(gifDetailsURL).header("Authorization", "Bearer " + authToken).getJSON();
} else if (isSearch().matches()) { } else if (isSearch().matches() || isTags().matches()) {
// TODO fix search var json = Http.url(getSearchOrTagsURL()).header("Authorization", "Bearer " + authToken).getJSON();
// TODO remove maxPages = json.getInt("pages");
throw new IOException("TODO remove"); return json;
} else { } else {
username = getGID(url); username = getGID(url);
var uri = new URIBuilder(String.format(USERS_SEARCH_ENDPOINT, username)); var uri = new URIBuilder(String.format(USERS_SEARCH_ENDPOINT, username));
@@ -111,7 +125,7 @@ public class RedgifsRipper extends AbstractJSONRipper {
return json; return json;
} }
} catch (URISyntaxException e) { } catch (URISyntaxException e) {
throw new IOException(e); throw new IOException("Failed to build first page url", e);
} }
} }
@@ -122,14 +136,35 @@ public class RedgifsRipper extends AbstractJSONRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Matcher m = isProfile(); Matcher m = isProfile();
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} }
m = isSearch(); m = isSearch();
if (m.matches()) { if (m.matches()) {
return m.group(1); var sText = m.group(1);
if (sText == null || sText.isBlank()){
throw new MalformedURLException(String.format("Expected redgifs.com/search?query=searchtext\n Got %s", url));
}
sText = URLDecoder.decode(sText, StandardCharsets.UTF_8);
sText = sText.replaceAll("[^A-Za-z0-9_-]", "-");
return sText;
}
m = isTags();
if (m.matches()) {
var sText = m.group(1);
if (sText == null || sText.isBlank()){
throw new MalformedURLException(String.format("Expected redgifs.com/gifs/searchtags\n Got %s", url));
}
sText = URLDecoder.decode(sText, StandardCharsets.UTF_8);
var list = Arrays.asList(sText.split(","));
if (list.size() > 1) {
logger.warn("Url with multiple tags found. \nThey will be sorted alphabetically for folder name.");
}
Collections.sort(list);
var gid = list.stream().reduce("", (acc, val) -> acc.concat("_" + val));
gid = gid.replaceAll("[^A-Za-z0-9_-]", "-");
return gid;
} }
m = isSingleton(); m = isSingleton();
if (m.matches()) { if (m.matches()) {
@@ -137,8 +172,10 @@ public class RedgifsRipper extends AbstractJSONRipper {
} }
throw new MalformedURLException( throw new MalformedURLException(
"Expected redgifs.com format: " "Expected redgifs.com format: "
+ "redgifs.com/id or " + "redgifs.com/watch/id or "
+ "thumbs.redgifs.com/id.gif" + "redgifs.com/users/id or "
+ "redgifs.com/gifs/id or "
+ "redgifs.com/search?query=text"
+ " Got: " + url); + " Got: " + url);
} }
@@ -160,10 +197,11 @@ public class RedgifsRipper extends AbstractJSONRipper {
return null; return null;
} }
currentPage++; currentPage++;
if (isSearch().matches()) { if (isSearch().matches() || isTags().matches()) {
// TODO search var json = Http.url(getSearchOrTagsURL()).header("Authorization", "Bearer " + authToken).getJSON();
// TODO remove // Handle rare maxPages change during a rip
throw new IOException("// TODO remove"); maxPages = json.getInt("pages");
return json;
} else if (isProfile().matches()) { } else if (isProfile().matches()) {
var uri = new URIBuilder(String.format(USERS_SEARCH_ENDPOINT, getGID(url))); var uri = new URIBuilder(String.format(USERS_SEARCH_ENDPOINT, getGID(url)));
uri.addParameter("order", "new"); uri.addParameter("order", "new");
@@ -181,13 +219,12 @@ public class RedgifsRipper extends AbstractJSONRipper {
@Override @Override
public List<String> getURLsFromJSON(JSONObject json) { public List<String> getURLsFromJSON(JSONObject json) {
List<String> result = new ArrayList<>(); List<String> result = new ArrayList<>();
if (isProfile().matches() || isSearch().matches()) { if (isProfile().matches() || isSearch().matches() || isTags().matches()) {
// TODO check json keys for search
var gifs = json.getJSONArray("gifs"); var gifs = json.getJSONArray("gifs");
for (var gif : gifs) { for (var gif : gifs) {
if (((JSONObject)gif).isNull("gallery")) { if (((JSONObject)gif).isNull("gallery")) {
var hdURL = ((JSONObject)gif).getJSONObject("urls").getString("hd"); var hdURL = ((JSONObject)gif).getJSONObject("urls").getString("hd");
result.add(hdURL); result.add(hdURL);
} else { } else {
var galleryID = ((JSONObject)gif).getString("gallery"); var galleryID = ((JSONObject)gif).getString("gallery");
var gifID = ((JSONObject)gif).getString("id"); var gifID = ((JSONObject)gif).getString("id");
@@ -198,7 +235,7 @@ public class RedgifsRipper extends AbstractJSONRipper {
var gif = json.getJSONObject("gif"); var gif = json.getJSONObject("gif");
if (gif.isNull("gallery")) { if (gif.isNull("gallery")) {
String hdURL = gif.getJSONObject("urls").getString("hd"); String hdURL = gif.getJSONObject("urls").getString("hd");
result.add(hdURL); result.add(hdURL);
} else { } else {
var galleryID = gif.getString("gallery"); var galleryID = gif.getString("gallery");
var gifID = gif.getString("id"); var gifID = gif.getString("id");
@@ -285,4 +322,84 @@ public class RedgifsRipper extends AbstractJSONRipper {
var token = json.getString("token"); var token = json.getString("token");
authToken = token; authToken = token;
} }
/**
* Map browser url query params to search or tags endpoint query params and return the complete url.
*
* Search text for search url comes from the query params, whereas search text for tags url comes from the path.
*
* Tab type for search url comes from the path whereas, tab type for tags url comes from query params.
* @return Search or tags endpoint url
*/
private URL getSearchOrTagsURL() throws IOException, URISyntaxException {
URIBuilder uri;
Map<String, String> endpointQueryParams = new HashMap<>();
var browserURLQueryParams = new URIBuilder(url.toString()).getQueryParams();
for (var qp : browserURLQueryParams) {
var name = qp.getName();
var value = qp.getValue();
switch (name) {
case "query":
endpointQueryParams.put("query", URLDecoder.decode(value, StandardCharsets.UTF_8));
break;
case "tab":
switch (value) {
case "gifs" -> endpointQueryParams.put("type", "g");
case "images" -> endpointQueryParams.put("type", "i");
default -> logger.warn(String.format("Unsupported tab for tags url %s", value));
}
break;
case "verified":
if (value != null && value.equals("1")) {
if (isTags().matches()){
endpointQueryParams.put("verified", "y");
} else {
endpointQueryParams.put("verified", "yes");
}
}
break;
case "order":
endpointQueryParams.put("order", value);
break;
case "viewMode":
break;
default:
logger.warn(String.format("Unexpected query param %s for search url. Skipping.", name));
}
}
// Build the search or tags url and add missing query params if any
if (isTags().matches()) {
var subpaths = url.getPath().split("/");
if (subpaths.length != 0) {
endpointQueryParams.put("search_text", subpaths[subpaths.length-1]);
} else {
throw new IOException("Failed to get search tags for url");
}
// Check if it is the main tags page with all gifs, images, creator etc
if (!endpointQueryParams.containsKey("type")) {
logger.warn("No tab selected, defaulting to gifs");
endpointQueryParams.put("type", "g");
}
uri = new URIBuilder(TAGS_ENDPOINT);
} else {
var tabType = "gifs";
var subpaths = url.getPath().split("/");
if (subpaths.length != 0) {
switch (subpaths[subpaths.length-1]) {
case "gifs" -> tabType = "gifs";
case "images" -> tabType = "images";
case "search" -> logger.warn("No tab selected, defaulting to gifs");
default -> logger.warn(String.format("Unsupported search tab %s, defaulting to gifs", subpaths[subpaths.length-1]));
}
}
uri = new URIBuilder(String.format(SEARCH_ENDPOINT, tabType));
}
endpointQueryParams.put("page", Integer.toString(currentPage));
endpointQueryParams.put("count", Integer.toString(count));
endpointQueryParams.forEach((k, v) -> uri.addParameter(k, v));
return uri.build().toURL();
}
} }