diff --git a/lib/setuplib.php b/lib/setuplib.php index 38f746f76ac..d100654f954 100644 --- a/lib/setuplib.php +++ b/lib/setuplib.php @@ -1314,12 +1314,18 @@ function is_web_crawler() { return true; } else if (strpos($_SERVER['HTTP_USER_AGENT'], '[ZSEBOT]') !== false ) { // Zoomspider return true; - } else if (strpos($_SERVER['HTTP_USER_AGENT'], 'MSNBOT') !== false ) { // MSN Search + } else if (stripos($_SERVER['HTTP_USER_AGENT'], 'msnbot') !== false ) { // MSN Search + return true; + } else if (strpos($_SERVER['HTTP_USER_AGENT'], 'bingbot') !== false ) { // Bing return true; } else if (strpos($_SERVER['HTTP_USER_AGENT'], 'Yandex') !== false ) { return true; } else if (strpos($_SERVER['HTTP_USER_AGENT'], 'AltaVista') !== false ) { return true; + } else if (stripos($_SERVER['HTTP_USER_AGENT'], 'baiduspider') !== false ) { // Baidu + return true; + } else if (strpos($_SERVER['HTTP_USER_AGENT'], 'Teoma') !== false ) { // Ask.com + return true; } } return false; diff --git a/lib/tests/setuplib_test.php b/lib/tests/setuplib_test.php index 7008e583365..30a257bd50e 100644 --- a/lib/tests/setuplib_test.php +++ b/lib/tests/setuplib_test.php @@ -71,4 +71,51 @@ class core_setuplib_testcase extends basic_testcase { $this->assertEquals($CFG->wwwroot . '/lib/tests/setuplib_test.php', get_docs_url('%%WWWROOT%%/lib/tests/setuplib_test.php')); } + + public function test_is_web_crawler() { + $browsers = array( + 'Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/18.0 Firefox/18.0', + 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/412 (KHTML, like Gecko) Safari/412', + 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10', + 'Opera/9.0 (Windows NT 5.1; U; en)', + 'Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17 –Nexus', + 'Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5', + ); + $crawlers = array( + // Google + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + 'Googlebot/2.1 (+http://www.googlebot.com/bot.html)', + 'Googlebot-Image/1.0', + // Yahoo + 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', + // Bing + 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', + 'Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)', + // MSN + 'msnbot/2.1', + // Yandex + 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)', + 'Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)', + // AltaVista + 'AltaVista V2.0B crawler@evreka.com', + // ZoomSpider + 'ZoomSpider - wrensoft.com [ZSEBOT]', + // Baidu + 'Baiduspider+(+http://www.baidu.com/search/spider_jp.html)', + 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', + 'BaiDuSpider', + // Ask.com + 'User-Agent: Mozilla/2.0 (compatible; Ask Jeeves/Teoma)', + ); + + foreach ($browsers as $agent) { + $_SERVER['HTTP_USER_AGENT'] = $agent; + $this->assertFalse(is_web_crawler()); + } + foreach ($crawlers as $agent) { + $_SERVER['HTTP_USER_AGENT'] = $agent; + $this->assertTrue(is_web_crawler(), "$agent should be considered a search engine"); + } + } }