mirror of
https://github.com/phpbb/phpbb.git
synced 2025-04-14 12:52:08 +02:00
Merge pull request #6272 from MichaIng/ticket/16851
[ticket/16851] Add Amazonbot, AhrefsBot and SemrushBot
This commit is contained in:
commit
8db200d75f
@ -2,7 +2,7 @@
|
||||
/**
|
||||
* Rebuild BOTS
|
||||
*
|
||||
* You should make a backup from your whole database. Things can and will go wrong.
|
||||
* You should make a backup from your whole database. Things can and will go wrong.
|
||||
* This will only work if no BOTs were added.
|
||||
*
|
||||
*/
|
||||
@ -24,10 +24,14 @@ $user->setup();
|
||||
|
||||
$bots = array(
|
||||
'AdsBot [Google]' => array('AdsBot-Google', ''),
|
||||
'Ahrefs [Bot]' => array('AhrefsBot/', ''),
|
||||
'Alexa [Bot]' => array('ia_archiver', ''),
|
||||
'Alta Vista [Bot]' => array('Scooter/', ''),
|
||||
'Amazon [Bot]' => array('Amazonbot/', ''),
|
||||
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
|
||||
'Baidu [Spider]' => array('Baiduspider+(', ''),
|
||||
'Baidu [Spider]' => array('Baiduspider', ''),
|
||||
'Bing [Bot]' => array('bingbot/', ''),
|
||||
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
|
||||
'Exabot [Bot]' => array('Exabot/', ''),
|
||||
'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''),
|
||||
'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''),
|
||||
@ -41,7 +45,7 @@ $bots = array(
|
||||
'Heritrix [Crawler]' => array('heritrix/1.', ''),
|
||||
'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''),
|
||||
'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''),
|
||||
'ichiro [Crawler]' => array('ichiro/2', ''),
|
||||
'ichiro [Crawler]' => array('ichiro/', ''),
|
||||
'Majestic-12 [Bot]' => array('MJ12bot/', ''),
|
||||
'Metager [Bot]' => array('MetagerBot/', ''),
|
||||
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
|
||||
@ -54,6 +58,7 @@ $bots = array(
|
||||
'Online link [Validator]' => array('online link validator', ''),
|
||||
'psbot [Picsearch]' => array('psbot/0', ''),
|
||||
'Seekport [Bot]' => array('Seekbot/', ''),
|
||||
'Semrush [Bot]' => array('SemrushBot/', ''),
|
||||
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
|
||||
'SEO Crawler' => array('SEO search Crawler/', ''),
|
||||
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
|
||||
@ -63,7 +68,7 @@ $bots = array(
|
||||
'Synoo [Bot]' => array('SynooBot/', ''),
|
||||
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
|
||||
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
|
||||
'Voyager [Bot]' => array('voyager/1.0', ''),
|
||||
'Voyager [Bot]' => array('voyager/', ''),
|
||||
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
|
||||
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
|
||||
'W3C [Validator]' => array('W3C_*Validator', ''),
|
||||
@ -74,7 +79,7 @@ $bots = array(
|
||||
'Yahoo [Bot]' => array('Yahoo! Slurp', ''),
|
||||
'YahooSeeker [Bot]' => array('YahooSeeker/', ''),
|
||||
);
|
||||
|
||||
|
||||
$bot_ids = array();
|
||||
user_get_id_name($bot_ids, array_keys($bots), USER_IGNORE);
|
||||
foreach($bot_ids as $bot)
|
||||
|
@ -1836,10 +1836,12 @@ function add_bots()
|
||||
|
||||
$bots = array(
|
||||
'AdsBot [Google]' => array('AdsBot-Google', ''),
|
||||
'Ahrefs [Bot]' => array('AhrefsBot/', ''),
|
||||
'Alexa [Bot]' => array('ia_archiver', ''),
|
||||
'Alta Vista [Bot]' => array('Scooter/', ''),
|
||||
'Amazon [Bot]' => array('Amazonbot/', ''),
|
||||
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
|
||||
'Baidu [Spider]' => array('Baiduspider+(', ''),
|
||||
'Baidu [Spider]' => array('Baiduspider', ''),
|
||||
'Bing [Bot]' => array('bingbot/', ''),
|
||||
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
|
||||
'Exabot [Bot]' => array('Exabot/', ''),
|
||||
@ -1855,7 +1857,7 @@ function add_bots()
|
||||
'Heritrix [Crawler]' => array('heritrix/1.', ''),
|
||||
'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''),
|
||||
'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''),
|
||||
'ichiro [Crawler]' => array('ichiro/2', ''),
|
||||
'ichiro [Crawler]' => array('ichiro/', ''),
|
||||
'Majestic-12 [Bot]' => array('MJ12bot/', ''),
|
||||
'Metager [Bot]' => array('MetagerBot/', ''),
|
||||
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
|
||||
@ -1868,6 +1870,7 @@ function add_bots()
|
||||
'Online link [Validator]' => array('online link validator', ''),
|
||||
'psbot [Picsearch]' => array('psbot/0', ''),
|
||||
'Seekport [Bot]' => array('Seekbot/', ''),
|
||||
'Semrush [Bot]' => array('SemrushBot/', ''),
|
||||
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
|
||||
'SEO Crawler' => array('SEO search Crawler/', ''),
|
||||
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
|
||||
@ -1877,7 +1880,7 @@ function add_bots()
|
||||
'Synoo [Bot]' => array('SynooBot/', ''),
|
||||
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
|
||||
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
|
||||
'Voyager [Bot]' => array('voyager/1.0', ''),
|
||||
'Voyager [Bot]' => array('voyager/', ''),
|
||||
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
|
||||
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
|
||||
'W3C [Validator]' => array('W3C_*Validator', ''),
|
||||
|
104
phpBB/phpbb/db/migration/data/v33x/bot_update_v2.php
Normal file
104
phpBB/phpbb/db/migration/data/v33x/bot_update_v2.php
Normal file
@ -0,0 +1,104 @@
|
||||
<?php
|
||||
/**
|
||||
*
|
||||
* This file is part of the phpBB Forum Software package.
|
||||
*
|
||||
* @copyright (c) phpBB Limited <https://www.phpbb.com>
|
||||
* @license GNU General Public License, version 2 (GPL-2.0)
|
||||
*
|
||||
* For full copyright and license information, please see
|
||||
* the docs/CREDITS.txt file.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace phpbb\db\migration\data\v33x;
|
||||
|
||||
class bot_update_v2 extends \phpbb\db\migration\migration
|
||||
{
|
||||
public static function depends_on()
|
||||
{
|
||||
return ['\phpbb\db\migration\data\v33x\v334'];
|
||||
}
|
||||
|
||||
public function update_data()
|
||||
{
|
||||
return [
|
||||
['custom', [[$this, 'add_bots']]],
|
||||
];
|
||||
}
|
||||
|
||||
public function add_bots()
|
||||
{
|
||||
$bots = [
|
||||
'Ahrefs [Bot]' => 'AhrefsBot/',
|
||||
'Amazon [Bot]' => 'Amazonbot/',
|
||||
'Semrush [Bot]' => 'SemrushBot/',
|
||||
];
|
||||
|
||||
$group_row = [];
|
||||
|
||||
foreach ($bots as $bot_name => $bot_agent)
|
||||
{
|
||||
$bot_name_clean = utf8_clean_string($bot_name);
|
||||
|
||||
$sql = 'SELECT user_id
|
||||
FROM ' . $this->table_prefix . 'users
|
||||
WHERE ' . $this->db->sql_build_array('SELECT', ['username_clean' => $bot_name_clean]);
|
||||
$result = $this->db->sql_query($sql);
|
||||
$bot_exists = (bool) $this->db->sql_fetchfield('user_id');
|
||||
$this->db->sql_freeresult($result);
|
||||
|
||||
if ($bot_exists)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!count($group_row))
|
||||
{
|
||||
$sql = 'SELECT group_id, group_colour
|
||||
FROM ' . $this->table_prefix . 'groups
|
||||
WHERE ' . $this->db->sql_build_array('SELECT', ['group_name' => 'BOTS']);
|
||||
$result = $this->db->sql_query($sql);
|
||||
$group_row = $this->db->sql_fetchrow($result);
|
||||
$this->db->sql_freeresult($result);
|
||||
|
||||
// Default fallback, should never get here
|
||||
if (!count($group_row))
|
||||
{
|
||||
$group_row['group_id'] = 6;
|
||||
$group_row['group_colour'] = '9E8DA7';
|
||||
}
|
||||
}
|
||||
|
||||
if (!function_exists('user_add'))
|
||||
{
|
||||
include($this->phpbb_root_path . 'includes/functions_user.' . $this->php_ext);
|
||||
}
|
||||
|
||||
$user_row = [
|
||||
'user_type' => USER_IGNORE,
|
||||
'group_id' => $group_row['group_id'],
|
||||
'username' => $bot_name,
|
||||
'user_regdate' => time(),
|
||||
'user_password' => '',
|
||||
'user_colour' => $group_row['group_colour'],
|
||||
'user_email' => '',
|
||||
'user_lang' => $this->config['default_lang'],
|
||||
'user_style' => $this->config['default_style'],
|
||||
'user_timezone' => 0,
|
||||
'user_dateformat' => $this->config['default_dateformat'],
|
||||
'user_allow_massemail' => 0,
|
||||
];
|
||||
|
||||
$user_id = user_add($user_row);
|
||||
$sql = 'INSERT INTO ' . $this->table_prefix . 'bots ' . $this->db->sql_build_array('INSERT', [
|
||||
'bot_active' => 1,
|
||||
'bot_name' => $bot_name,
|
||||
'user_id' => (int) $user_id,
|
||||
'bot_agent' => $bot_agent,
|
||||
'bot_ip' => '',
|
||||
]);
|
||||
$this->db->sql_query($sql);
|
||||
}
|
||||
}
|
||||
}
|
@ -58,13 +58,15 @@ class add_bots extends \phpbb\install\task_base
|
||||
*/
|
||||
protected $bot_list = array(
|
||||
'AdsBot [Google]' => array('AdsBot-Google', ''),
|
||||
'Ahrefs [Bot]' => array('AhrefsBot/', ''),
|
||||
'Alexa [Bot]' => array('ia_archiver', ''),
|
||||
'Alta Vista [Bot]' => array('Scooter/', ''),
|
||||
'Amazon [Bot]' => array('Amazonbot/', ''),
|
||||
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
|
||||
'Baidu [Spider]' => array('Baiduspider', ''),
|
||||
'Bing [Bot]' => array('bingbot/', ''),
|
||||
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
|
||||
'Exabot [Bot]' => array('Exabot', ''),
|
||||
'Exabot [Bot]' => array('Exabot/', ''),
|
||||
'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''),
|
||||
'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''),
|
||||
'Francis [Bot]' => array('http://www.neomo.de/', ''),
|
||||
@ -83,21 +85,28 @@ class add_bots extends \phpbb\install\task_base
|
||||
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
|
||||
'MSN [Bot]' => array('msnbot/', ''),
|
||||
'MSNbot Media' => array('msnbot-media/', ''),
|
||||
'NG-Search [Bot]' => array('NG-Search/', ''),
|
||||
'Nutch [Bot]' => array('http://lucene.apache.org/nutch/', ''),
|
||||
'Nutch/CVS [Bot]' => array('NutchCVS/', ''),
|
||||
'OmniExplorer [Bot]' => array('OmniExplorer_Bot/', ''),
|
||||
'Online link [Validator]' => array('online link validator', ''),
|
||||
'psbot [Picsearch]' => array('psbot/0', ''),
|
||||
'Seekport [Bot]' => array('Seekbot/', ''),
|
||||
'Semrush [Bot]' => array('SemrushBot/', ''),
|
||||
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
|
||||
'SEO Crawler' => array('SEO search Crawler/', ''),
|
||||
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
|
||||
'SEOSearch [Crawler]' => array('SEOsearch/', ''),
|
||||
'Snappy [Bot]' => array('Snappy/1.1 ( http://www.urltrends.com/ )', ''),
|
||||
'Steeler [Crawler]' => array('http://www.tkl.iis.u-tokyo.ac.jp/~crawler/', ''),
|
||||
'Synoo [Bot]' => array('SynooBot/', ''),
|
||||
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
|
||||
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
|
||||
'Voyager [Bot]' => array('voyager/', ''),
|
||||
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
|
||||
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
|
||||
'W3C [Validator]' => array('W3C_Validator', ''),
|
||||
'W3C [Validator]' => array('W3C_*Validator', ''),
|
||||
'WiseNut [Bot]' => array('http://www.WISEnutbot.com', ''),
|
||||
'YaCy [Bot]' => array('yacybot', ''),
|
||||
'Yahoo MMCrawler [Bot]' => array('Yahoo-MMCrawler/', ''),
|
||||
'Yahoo Slurp [Bot]' => array('Yahoo! DE Slurp', ''),
|
||||
|
Loading…
x
Reference in New Issue
Block a user