From 3bc082dd0715bb7e17dde53be90735d9bd5efd4c Mon Sep 17 00:00:00 2001 From: Brendan Heywood Date: Thu, 18 Jul 2019 12:06:25 +1000 Subject: [PATCH] MDL-66166 core_useragent: Update MoodleBot UA and treat as web crawler --- lib/classes/useragent.php | 15 ++++++++++++++- lib/filelib.php | 4 ++-- lib/tests/filelib_test.php | 8 +++++--- lib/tests/useragent_test.php | 10 ++++++++++ 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/lib/classes/useragent.php b/lib/classes/useragent.php index 763543b77f0..767f0f50111 100644 --- a/lib/classes/useragent.php +++ b/lib/classes/useragent.php @@ -137,6 +137,18 @@ class core_useragent { } } + /** + * Get the MoodleBot UserAgent for this site. + * + * @return string UserAgent + */ + public static function get_moodlebot_useragent() { + global $CFG; + + $version = moodle_major_version(); // Only major version for security. + return "MoodleBot/$version (+{$CFG->wwwroot})"; + } + /** * Returns the user agent string. * @return bool|string The user agent string or false if one isn't available. @@ -215,7 +227,8 @@ class core_useragent { * @return bool */ protected function is_useragent_web_crawler() { - $regex = '/Googlebot|google\.com|Yahoo! Slurp|\[ZSEBOT\]|msnbot|bingbot|BingPreview|Yandex|AltaVista|Baiduspider|Teoma/i'; + $regex = '/MoodleBot|Googlebot|google\.com|Yahoo! Slurp|\[ZSEBOT\]|msnbot|bingbot|BingPreview|Yandex|AltaVista' + .'|Baiduspider|Teoma/i'; return (preg_match($regex, $this->useragent)); } diff --git a/lib/filelib.php b/lib/filelib.php index c541731829f..cb20d0676b1 100644 --- a/lib/filelib.php +++ b/lib/filelib.php @@ -3103,7 +3103,7 @@ class curl { */ public function resetopt() { $this->options = array(); - $this->options['CURLOPT_USERAGENT'] = 'MoodleBot/1.0'; + $this->options['CURLOPT_USERAGENT'] = \core_useragent::get_moodlebot_useragent(); // True to include the header in the output $this->options['CURLOPT_HEADER'] = 0; // True to Exclude the body from the output @@ -3346,7 +3346,7 @@ class curl { } else if (!empty($this->options['CURLOPT_USERAGENT'])) { $useragent = $this->options['CURLOPT_USERAGENT']; } else { - $useragent = 'MoodleBot/1.0'; + $useragent = \core_useragent::get_moodlebot_useragent(); } // Set headers. diff --git a/lib/tests/filelib_test.php b/lib/tests/filelib_test.php index f6e0c1b5dd3..e3bab6df77d 100644 --- a/lib/tests/filelib_test.php +++ b/lib/tests/filelib_test.php @@ -1060,14 +1060,16 @@ EOF; $options = $curl->get_options(); $this->assertNotEmpty($options); + $moodlebot = \core_useragent::get_moodlebot_useragent(); + $curl->call_apply_opt($options); - $this->assertTrue(in_array('User-Agent: MoodleBot/1.0', $curl->header)); + $this->assertTrue(in_array("User-Agent: $moodlebot", $curl->header)); $this->assertFalse(in_array('User-Agent: Test/1.0', $curl->header)); $options['CURLOPT_USERAGENT'] = 'Test/1.0'; $curl->call_apply_opt($options); $this->assertTrue(in_array('User-Agent: Test/1.0', $curl->header)); - $this->assertFalse(in_array('User-Agent: MoodleBot/1.0', $curl->header)); + $this->assertFalse(in_array("User-Agent: $moodlebot", $curl->header)); $curl->set_option('CURLOPT_USERAGENT', 'AnotherUserAgent/1.0'); $curl->call_apply_opt(); @@ -1082,7 +1084,7 @@ EOF; $curl->unset_option('CURLOPT_USERAGENT'); $curl->call_apply_opt(); - $this->assertTrue(in_array('User-Agent: MoodleBot/1.0', $curl->header)); + $this->assertTrue(in_array("User-Agent: $moodlebot", $curl->header)); // Finally, test it via exttests, to ensure the agent is sent properly. // Matching. diff --git a/lib/tests/useragent_test.php b/lib/tests/useragent_test.php index 3342e35458b..28a3b0031a3 100644 --- a/lib/tests/useragent_test.php +++ b/lib/tests/useragent_test.php @@ -1387,6 +1387,16 @@ class core_useragent_testcase extends advanced_testcase { ), ), + // MoodleBot. + array( + 'User-Agent: MoodleBot/3.8 (+https://moodle.org)', + array( + 'is_web_crawler' => true, + 'versionclasses' => array( + ), + ), + ), + // Macos Desktop app. array( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) moodlemobile/3.6.0 Chrome/69.0.3497.106 Electron/4.0.1 Safari/537.36 MoodleMobile',