1
0
mirror of https://github.com/phpbb/phpbb.git synced 2025-04-14 12:52:08 +02:00

Merge pull request #6272 from MichaIng/ticket/16851

[ticket/16851] Add Amazonbot, AhrefsBot and SemrushBot
This commit is contained in:
Marc Alexander 2021-08-25 20:07:31 +02:00
commit 8db200d75f
No known key found for this signature in database
GPG Key ID: 50E0D2423696F995
4 changed files with 131 additions and 10 deletions

View File

@ -2,7 +2,7 @@
/**
* Rebuild BOTS
*
* You should make a backup from your whole database. Things can and will go wrong.
* You should make a backup from your whole database. Things can and will go wrong.
* This will only work if no BOTs were added.
*
*/
@ -24,10 +24,14 @@ $user->setup();
$bots = array(
'AdsBot [Google]' => array('AdsBot-Google', ''),
'Ahrefs [Bot]' => array('AhrefsBot/', ''),
'Alexa [Bot]' => array('ia_archiver', ''),
'Alta Vista [Bot]' => array('Scooter/', ''),
'Amazon [Bot]' => array('Amazonbot/', ''),
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
'Baidu [Spider]' => array('Baiduspider+(', ''),
'Baidu [Spider]' => array('Baiduspider', ''),
'Bing [Bot]' => array('bingbot/', ''),
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
'Exabot [Bot]' => array('Exabot/', ''),
'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''),
'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''),
@ -41,7 +45,7 @@ $bots = array(
'Heritrix [Crawler]' => array('heritrix/1.', ''),
'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''),
'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''),
'ichiro [Crawler]' => array('ichiro/2', ''),
'ichiro [Crawler]' => array('ichiro/', ''),
'Majestic-12 [Bot]' => array('MJ12bot/', ''),
'Metager [Bot]' => array('MetagerBot/', ''),
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
@ -54,6 +58,7 @@ $bots = array(
'Online link [Validator]' => array('online link validator', ''),
'psbot [Picsearch]' => array('psbot/0', ''),
'Seekport [Bot]' => array('Seekbot/', ''),
'Semrush [Bot]' => array('SemrushBot/', ''),
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
'SEO Crawler' => array('SEO search Crawler/', ''),
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
@ -63,7 +68,7 @@ $bots = array(
'Synoo [Bot]' => array('SynooBot/', ''),
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
'Voyager [Bot]' => array('voyager/1.0', ''),
'Voyager [Bot]' => array('voyager/', ''),
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
'W3C [Validator]' => array('W3C_*Validator', ''),
@ -74,7 +79,7 @@ $bots = array(
'Yahoo [Bot]' => array('Yahoo! Slurp', ''),
'YahooSeeker [Bot]' => array('YahooSeeker/', ''),
);
$bot_ids = array();
user_get_id_name($bot_ids, array_keys($bots), USER_IGNORE);
foreach($bot_ids as $bot)

View File

@ -1836,10 +1836,12 @@ function add_bots()
$bots = array(
'AdsBot [Google]' => array('AdsBot-Google', ''),
'Ahrefs [Bot]' => array('AhrefsBot/', ''),
'Alexa [Bot]' => array('ia_archiver', ''),
'Alta Vista [Bot]' => array('Scooter/', ''),
'Amazon [Bot]' => array('Amazonbot/', ''),
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
'Baidu [Spider]' => array('Baiduspider+(', ''),
'Baidu [Spider]' => array('Baiduspider', ''),
'Bing [Bot]' => array('bingbot/', ''),
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
'Exabot [Bot]' => array('Exabot/', ''),
@ -1855,7 +1857,7 @@ function add_bots()
'Heritrix [Crawler]' => array('heritrix/1.', ''),
'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''),
'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''),
'ichiro [Crawler]' => array('ichiro/2', ''),
'ichiro [Crawler]' => array('ichiro/', ''),
'Majestic-12 [Bot]' => array('MJ12bot/', ''),
'Metager [Bot]' => array('MetagerBot/', ''),
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
@ -1868,6 +1870,7 @@ function add_bots()
'Online link [Validator]' => array('online link validator', ''),
'psbot [Picsearch]' => array('psbot/0', ''),
'Seekport [Bot]' => array('Seekbot/', ''),
'Semrush [Bot]' => array('SemrushBot/', ''),
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
'SEO Crawler' => array('SEO search Crawler/', ''),
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
@ -1877,7 +1880,7 @@ function add_bots()
'Synoo [Bot]' => array('SynooBot/', ''),
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
'Voyager [Bot]' => array('voyager/1.0', ''),
'Voyager [Bot]' => array('voyager/', ''),
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
'W3C [Validator]' => array('W3C_*Validator', ''),

View File

@ -0,0 +1,104 @@
<?php
/**
*
* This file is part of the phpBB Forum Software package.
*
* @copyright (c) phpBB Limited <https://www.phpbb.com>
* @license GNU General Public License, version 2 (GPL-2.0)
*
* For full copyright and license information, please see
* the docs/CREDITS.txt file.
*
*/
namespace phpbb\db\migration\data\v33x;
class bot_update_v2 extends \phpbb\db\migration\migration
{
public static function depends_on()
{
return ['\phpbb\db\migration\data\v33x\v334'];
}
public function update_data()
{
return [
['custom', [[$this, 'add_bots']]],
];
}
public function add_bots()
{
$bots = [
'Ahrefs [Bot]' => 'AhrefsBot/',
'Amazon [Bot]' => 'Amazonbot/',
'Semrush [Bot]' => 'SemrushBot/',
];
$group_row = [];
foreach ($bots as $bot_name => $bot_agent)
{
$bot_name_clean = utf8_clean_string($bot_name);
$sql = 'SELECT user_id
FROM ' . $this->table_prefix . 'users
WHERE ' . $this->db->sql_build_array('SELECT', ['username_clean' => $bot_name_clean]);
$result = $this->db->sql_query($sql);
$bot_exists = (bool) $this->db->sql_fetchfield('user_id');
$this->db->sql_freeresult($result);
if ($bot_exists)
{
continue;
}
if (!count($group_row))
{
$sql = 'SELECT group_id, group_colour
FROM ' . $this->table_prefix . 'groups
WHERE ' . $this->db->sql_build_array('SELECT', ['group_name' => 'BOTS']);
$result = $this->db->sql_query($sql);
$group_row = $this->db->sql_fetchrow($result);
$this->db->sql_freeresult($result);
// Default fallback, should never get here
if (!count($group_row))
{
$group_row['group_id'] = 6;
$group_row['group_colour'] = '9E8DA7';
}
}
if (!function_exists('user_add'))
{
include($this->phpbb_root_path . 'includes/functions_user.' . $this->php_ext);
}
$user_row = [
'user_type' => USER_IGNORE,
'group_id' => $group_row['group_id'],
'username' => $bot_name,
'user_regdate' => time(),
'user_password' => '',
'user_colour' => $group_row['group_colour'],
'user_email' => '',
'user_lang' => $this->config['default_lang'],
'user_style' => $this->config['default_style'],
'user_timezone' => 0,
'user_dateformat' => $this->config['default_dateformat'],
'user_allow_massemail' => 0,
];
$user_id = user_add($user_row);
$sql = 'INSERT INTO ' . $this->table_prefix . 'bots ' . $this->db->sql_build_array('INSERT', [
'bot_active' => 1,
'bot_name' => $bot_name,
'user_id' => (int) $user_id,
'bot_agent' => $bot_agent,
'bot_ip' => '',
]);
$this->db->sql_query($sql);
}
}
}

View File

@ -58,13 +58,15 @@ class add_bots extends \phpbb\install\task_base
*/
protected $bot_list = array(
'AdsBot [Google]' => array('AdsBot-Google', ''),
'Ahrefs [Bot]' => array('AhrefsBot/', ''),
'Alexa [Bot]' => array('ia_archiver', ''),
'Alta Vista [Bot]' => array('Scooter/', ''),
'Amazon [Bot]' => array('Amazonbot/', ''),
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
'Baidu [Spider]' => array('Baiduspider', ''),
'Bing [Bot]' => array('bingbot/', ''),
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
'Exabot [Bot]' => array('Exabot', ''),
'Exabot [Bot]' => array('Exabot/', ''),
'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''),
'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''),
'Francis [Bot]' => array('http://www.neomo.de/', ''),
@ -83,21 +85,28 @@ class add_bots extends \phpbb\install\task_base
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
'MSN [Bot]' => array('msnbot/', ''),
'MSNbot Media' => array('msnbot-media/', ''),
'NG-Search [Bot]' => array('NG-Search/', ''),
'Nutch [Bot]' => array('http://lucene.apache.org/nutch/', ''),
'Nutch/CVS [Bot]' => array('NutchCVS/', ''),
'OmniExplorer [Bot]' => array('OmniExplorer_Bot/', ''),
'Online link [Validator]' => array('online link validator', ''),
'psbot [Picsearch]' => array('psbot/0', ''),
'Seekport [Bot]' => array('Seekbot/', ''),
'Semrush [Bot]' => array('SemrushBot/', ''),
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
'SEO Crawler' => array('SEO search Crawler/', ''),
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
'SEOSearch [Crawler]' => array('SEOsearch/', ''),
'Snappy [Bot]' => array('Snappy/1.1 ( http://www.urltrends.com/ )', ''),
'Steeler [Crawler]' => array('http://www.tkl.iis.u-tokyo.ac.jp/~crawler/', ''),
'Synoo [Bot]' => array('SynooBot/', ''),
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
'Voyager [Bot]' => array('voyager/', ''),
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
'W3C [Validator]' => array('W3C_Validator', ''),
'W3C [Validator]' => array('W3C_*Validator', ''),
'WiseNut [Bot]' => array('http://www.WISEnutbot.com', ''),
'YaCy [Bot]' => array('yacybot', ''),
'Yahoo MMCrawler [Bot]' => array('Yahoo-MMCrawler/', ''),
'Yahoo Slurp [Bot]' => array('Yahoo! DE Slurp', ''),