mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-06-11 00:50:58 +02:00
Overhaul the usage of libcurl-impersonate (#4535)
libcurl-impersonate was not being used properly, as the code was overriding the headers set by it to prevent detection. - update the libcurl-impersonate to an actively managed lexiforest fork - impersonate Chrome 131 - move the defaultHttpHeaders to http.php, where it belongs - only set defaultHttpHeaders if curl-impersonate is not detected - make useragent ini setting optional and disabled by default - add necessary documentation updates
This commit is contained in:
21
Dockerfile
21
Dockerfile
@ -25,36 +25,39 @@ RUN set -xe && \
|
|||||||
# php-zlib is enabled by default with PHP 8.2 in Debian 12
|
# php-zlib is enabled by default with PHP 8.2 in Debian 12
|
||||||
# for downloading libcurl-impersonate
|
# for downloading libcurl-impersonate
|
||||||
curl \
|
curl \
|
||||||
|
# for patching libcurl-impersonate
|
||||||
|
patchelf \
|
||||||
&& \
|
&& \
|
||||||
# install curl-impersonate library
|
# install curl-impersonate library
|
||||||
curlimpersonate_version=0.6.0 && \
|
curlimpersonate_version=1.0.0rc2 && \
|
||||||
{ \
|
{ \
|
||||||
{ \
|
{ \
|
||||||
[ $(arch) = 'aarch64' ] && \
|
[ $(arch) = 'aarch64' ] && \
|
||||||
archive="libcurl-impersonate-v${curlimpersonate_version}.aarch64-linux-gnu.tar.gz" && \
|
archive="libcurl-impersonate-v${curlimpersonate_version}.aarch64-linux-gnu.tar.gz" && \
|
||||||
sha512sum="d04b1eabe71f3af06aa1ce99b39a49c5e1d33b636acedcd9fad163bc58156af5c3eb3f75aa706f335515791f7b9c7a6c40ffdfa47430796483ecef929abd905d" \
|
sha512sum="c8add80e7a0430a074edea1a11f73d03044c48e848e164af2d6f362866623e29bede207a50f18f95b1bc5ab3d33f5c31408be60a6da66b74a0d176eebe299116" \
|
||||||
; } \
|
; } \
|
||||||
|| { \
|
|| { \
|
||||||
[ $(arch) = 'armv7l' ] && \
|
[ $(arch) = 'armv7l' ] && \
|
||||||
archive="libcurl-impersonate-v${curlimpersonate_version}.arm-linux-gnueabihf.tar.gz" && \
|
archive="libcurl-impersonate-v${curlimpersonate_version}.arm-linux-gnueabihf.tar.gz" && \
|
||||||
sha512sum="05906b4efa1a6ed8f3b716fd83d476b6eea6bfc68e3dbc5212d65a2962dcaa7bd1f938c9096a7535252b11d1d08fb93adccc633585ff8cb8cec5e58bfe969bc9" \
|
sha512sum="d0403ca4ad55a8d499b120e5675c7b5a0dc4946af49c933e91fc24455ffe5e122aa21ee95554612ff5d1bd6faea1556e1e1b9c821918e2644cc9bcbddc05747a" \
|
||||||
; } \
|
; } \
|
||||||
|| { \
|
|| { \
|
||||||
[ $(arch) = 'x86_64' ] && \
|
[ $(arch) = 'x86_64' ] && \
|
||||||
archive="libcurl-impersonate-v${curlimpersonate_version}.x86_64-linux-gnu.tar.gz" && \
|
archive="libcurl-impersonate-v${curlimpersonate_version}.x86_64-linux-gnu.tar.gz" && \
|
||||||
sha512sum="480bbe9452cd9aff2c0daaaf91f1057b3a96385f79011628a9237223757a9b0d090c59cb5982dc54ea0d07191657299ea91ca170a25ced3d7d410fcdff130ace" \
|
sha512sum="35cafda2b96df3218a6d8545e0947a899837ede51c90f7ef2980bd2d99dbd67199bc620000df28b186727300b8c7046d506807fb48ee0fbc068dc8ae01986339" \
|
||||||
; } \
|
; } \
|
||||||
} && \
|
} && \
|
||||||
curl -LO "https://github.com/lwthiker/curl-impersonate/releases/download/v${curlimpersonate_version}/${archive}" && \
|
curl -LO "https://github.com/lexiforest/curl-impersonate/releases/download/v${curlimpersonate_version}/${archive}" && \
|
||||||
echo "$sha512sum $archive" | sha512sum -c - && \
|
echo "$sha512sum $archive" | sha512sum -c - && \
|
||||||
mkdir -p /usr/local/lib/curl-impersonate && \
|
mkdir -p /usr/local/lib/curl-impersonate && \
|
||||||
tar xaf "$archive" -C /usr/local/lib/curl-impersonate --wildcards 'libcurl-impersonate-ff.so*' && \
|
tar xaf "$archive" -C /usr/local/lib/curl-impersonate && \
|
||||||
|
patchelf --set-soname libcurl.so.4 /usr/local/lib/curl-impersonate/libcurl-impersonate.so && \
|
||||||
rm "$archive" && \
|
rm "$archive" && \
|
||||||
apt-get purge --assume-yes curl && \
|
apt-get purge --assume-yes curl patchelf && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so
|
ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate.so
|
||||||
ENV CURL_IMPERSONATE ff91esr
|
ENV CURL_IMPERSONATE chrome131
|
||||||
|
|
||||||
# logs should go to stdout / stderr
|
# logs should go to stdout / stderr
|
||||||
RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \
|
RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \
|
||||||
|
@ -53,7 +53,10 @@ timeout = 5
|
|||||||
retries = 1
|
retries = 1
|
||||||
|
|
||||||
; Curl user agent
|
; Curl user agent
|
||||||
useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"
|
; This is already set by curl-impersonate, which comes included as default
|
||||||
|
; in RSS-Bridge docker container. Use only if you know what you're doing.
|
||||||
|
; For reference, see https://github.com/lexiforest/curl-impersonate/tree/main/docs
|
||||||
|
;useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"
|
||||||
|
|
||||||
; Max http response size in MB
|
; Max http response size in MB
|
||||||
max_filesize = 20
|
max_filesize = 20
|
||||||
|
@ -119,7 +119,9 @@ Default network timeout.
|
|||||||
|
|
||||||
### useragent
|
### useragent
|
||||||
|
|
||||||
Default user agent.
|
Overrides the user agent value. Note that the default value, together with a set of other detection-preventing options is set
|
||||||
|
automatically by the [libcurl-impersonate](https://github.com/lexiforest/curl-impersonate), which is used by the default Docker container distributed together with RSS-Bridge. Use only if you know what you're doing, otherwise you may stop libcurl-impersonate
|
||||||
|
from doing its job impersonating real browser.
|
||||||
|
|
||||||
## Authentication
|
## Authentication
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
## Test a site before building a bridge
|
## Test a site before building a bridge
|
||||||
|
|
||||||
Some sites make use of anti-bot mechanisms (e.g.: by using JavaScript) in which case they work fine in regular browsers,
|
Some sites make use of anti-bot mechanisms (e.g.: by using JavaScript) in which case they work fine in regular browsers,
|
||||||
but not in the PHP environment.
|
but not in the PHP environment. RSS-Bridge Docker container by default resorts to using libcurl-impersonate, which helps mitigating anti-bot mechanisms.
|
||||||
|
|
||||||
To check if a site works with RSS-Bridge, create a new bridge using the
|
To check if a site works with RSS-Bridge, create a new bridge using the
|
||||||
[template](../05_Bridge_API/02_BridgeAbstract.md#template)
|
[template](../05_Bridge_API/02_BridgeAbstract.md#template)
|
||||||
|
@ -64,19 +64,7 @@ function getContents(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
|
$config['headers'] = $httpHeadersNormalized;
|
||||||
$defaultHttpHeaders = [
|
|
||||||
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
|
||||||
'Accept-Language' => 'en-US,en;q=0.5',
|
|
||||||
'Upgrade-Insecure-Requests' => '1',
|
|
||||||
'Sec-Fetch-Dest' => 'document',
|
|
||||||
'Sec-Fetch-Mode' => 'navigate',
|
|
||||||
'Sec-Fetch-Site' => 'none',
|
|
||||||
'Sec-Fetch-User' => '?1',
|
|
||||||
'TE' => 'trailers',
|
|
||||||
];
|
|
||||||
|
|
||||||
$config['headers'] = array_merge($defaultHttpHeaders, $httpHeadersNormalized);
|
|
||||||
|
|
||||||
$maxFileSize = Configuration::getConfig('http', 'max_filesize');
|
$maxFileSize = Configuration::getConfig('http', 'max_filesize');
|
||||||
if ($maxFileSize) {
|
if ($maxFileSize) {
|
||||||
|
29
lib/http.php
29
lib/http.php
@ -66,6 +66,8 @@ final class CurlHttpClient implements HttpClient
|
|||||||
{
|
{
|
||||||
public function request(string $url, array $config = []): Response
|
public function request(string $url, array $config = []): Response
|
||||||
{
|
{
|
||||||
|
$ch = curl_init($url);
|
||||||
|
|
||||||
$defaults = [
|
$defaults = [
|
||||||
'useragent' => null,
|
'useragent' => null,
|
||||||
'timeout' => 5,
|
'timeout' => 5,
|
||||||
@ -77,13 +79,27 @@ final class CurlHttpClient implements HttpClient
|
|||||||
'max_filesize' => null,
|
'max_filesize' => null,
|
||||||
'max_redirections' => 5,
|
'max_redirections' => 5,
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// if curl-impersonate is not detected, use some basic defaults
|
||||||
|
if (curl_version()['ssl_version'] != 'BoringSSL') {
|
||||||
|
// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
|
||||||
|
$defaults['headers'] = [
|
||||||
|
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||||
|
'Accept-Language' => 'en-US,en;q=0.5',
|
||||||
|
'Upgrade-Insecure-Requests' => '1',
|
||||||
|
'Sec-Fetch-Dest' => 'document',
|
||||||
|
'Sec-Fetch-Mode' => 'navigate',
|
||||||
|
'Sec-Fetch-Site' => 'none',
|
||||||
|
'Sec-Fetch-User' => '?1',
|
||||||
|
'TE' => 'trailers',
|
||||||
|
];
|
||||||
|
$defaults['useragent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0';
|
||||||
|
|
||||||
|
curl_setopt($ch, CURLOPT_HEADER, false);
|
||||||
|
}
|
||||||
|
|
||||||
$config = array_merge($defaults, $config);
|
$config = array_merge($defaults, $config);
|
||||||
|
|
||||||
$ch = curl_init($url);
|
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
|
||||||
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
|
|
||||||
curl_setopt($ch, CURLOPT_HEADER, false);
|
|
||||||
$httpHeaders = [];
|
$httpHeaders = [];
|
||||||
foreach ($config['headers'] as $name => $value) {
|
foreach ($config['headers'] as $name => $value) {
|
||||||
$httpHeaders[] = sprintf('%s: %s', $name, $value);
|
$httpHeaders[] = sprintf('%s: %s', $name, $value);
|
||||||
@ -92,6 +108,9 @@ final class CurlHttpClient implements HttpClient
|
|||||||
if ($config['useragent']) {
|
if ($config['useragent']) {
|
||||||
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
|
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
|
||||||
}
|
}
|
||||||
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||||
|
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
|
||||||
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
|
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
|
||||||
curl_setopt($ch, CURLOPT_ENCODING, '');
|
curl_setopt($ch, CURLOPT_ENCODING, '');
|
||||||
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
||||||
|
Reference in New Issue
Block a user