From 78fcdb5fdb8d0f7de16abb2ab476e6d60d84f45b Mon Sep 17 00:00:00 2001 From: Petr Skoda Date: Fri, 15 Jul 2011 15:00:49 +0200 Subject: [PATCH] MDL-28345 make sure input parameters do not contain invalid utf-8 chars --- lib/configonlylib.php | 3 +- lib/formslib.php | 2 ++ lib/moodlelib.php | 54 +++++++++++++++++++++++++++++++- lib/simpletest/testmoodlelib.php | 21 +++++++++++++ lib/weblib.php | 2 +- login/index.php | 2 +- user/addnote.php | 4 +-- user/groupaddnote.php | 4 +-- user/messageselect.php | 14 +++++---- 9 files changed, 92 insertions(+), 14 deletions(-) diff --git a/lib/configonlylib.php b/lib/configonlylib.php index 8d7dbf45d16..62391626d2d 100644 --- a/lib/configonlylib.php +++ b/lib/configonlylib.php @@ -58,7 +58,8 @@ function min_optional_param($name, $default, $type) { */ function min_clean_param($value, $type) { switch($type) { - case 'RAW': break; + case 'RAW': $value = iconv('UTF-8', 'UTF-8//IGNORE', $value); + break; case 'INT': $value = (int)$value; break; case 'SAFEDIR': $value = preg_replace('/[^a-zA-Z0-9_-]/', '', $value); diff --git a/lib/formslib.php b/lib/formslib.php index 91e49bd1f7a..7b2d301ce35 100644 --- a/lib/formslib.php +++ b/lib/formslib.php @@ -1380,6 +1380,8 @@ class MoodleQuickForm extends HTML_QuickForm_DHTMLRulesTableless { foreach ($submission as $key=>$s) { if (array_key_exists($key, $this->_types)) { $submission[$key] = clean_param($s, $this->_types[$key]); + } else { + $submission[$key] = clean_param($s, PARAM_RAW); } } $this->_submitValues = $submission; diff --git a/lib/moodlelib.php b/lib/moodlelib.php index e714bd26cea..47e5a7b3433 100644 --- a/lib/moodlelib.php +++ b/lib/moodlelib.php @@ -178,7 +178,7 @@ define('PARAM_PEM', 'pem'); define('PARAM_PERMISSION', 'permission'); /** - * PARAM_RAW specifies a parameter that is not cleaned/processed in any way + * PARAM_RAW specifies a parameter that is not cleaned/processed in any way except the discarding of the invalid utf-8 characters */ define('PARAM_RAW', 'raw'); @@ -569,9 +569,11 @@ function clean_param($param, $type) { switch ($type) { case PARAM_RAW: // no cleaning at all + $param = fix_utf8($param); return $param; case PARAM_RAW_TRIMMED: // no cleaning, but strip leading and trailing whitespace. + $param = fix_utf8($param); return trim($param); case PARAM_CLEAN: // General HTML cleaning, try to use more specific type if possible @@ -579,9 +581,11 @@ function clean_param($param, $type) { if (is_numeric($param)) { return $param; } + $param = fix_utf8($param); return clean_text($param); // Sweep for scripts, etc case PARAM_CLEANHTML: // clean html fragment + $param = fix_utf8($param); $param = clean_text($param, FORMAT_HTML); // Sweep for scripts, etc return trim($param); @@ -619,9 +623,11 @@ function clean_param($param, $type) { return $param; case PARAM_NOTAGS: // Strip all tags + $param = fix_utf8($param); return strip_tags($param); case PARAM_TEXT: // leave only tags needed for multilang + $param = fix_utf8($param); // if the multilang syntax is not correct we strip all tags // because it would break xhtml strict which is required for accessibility standards // please note this cleaning does not strip unbalanced '>' for BC compatibility reasons @@ -691,6 +697,7 @@ function clean_param($param, $type) { return preg_replace('/[^a-zA-Z0-9\/_-]/i', '', $param); case PARAM_FILE: // Strip all suspicious characters from filename + $param = fix_utf8($param); $param = preg_replace('~[[:cntrl:]]|[&<>"`\|\':\\\\/]~u', '', $param); $param = preg_replace('~\.\.+~', '', $param); if ($param === '.') { @@ -699,6 +706,7 @@ function clean_param($param, $type) { return $param; case PARAM_PATH: // Strip all suspicious characters from file path + $param = fix_utf8($param); $param = str_replace('\\', '/', $param); $param = preg_replace('~[[:cntrl:]]|[&<>"`\|\':]~u', '', $param); $param = preg_replace('~\.\.+~', '', $param); @@ -729,6 +737,7 @@ function clean_param($param, $type) { return $param; case PARAM_URL: // allow safe ftp, http, mailto urls + $param = fix_utf8($param); include_once($CFG->dirroot . '/lib/validateurlsyntax.php'); if (!empty($param) && validateUrlSyntax($param, 's?H?S?F?E?u-P-a?I?p?f?q?r?')) { // all is ok, param is respected @@ -805,6 +814,7 @@ function clean_param($param, $type) { } case PARAM_TAG: + $param = fix_utf8($param); // Please note it is not safe to use the tag name directly anywhere, // it must be processed with s(), urlencode() before embedding anywhere. // remove some nasties @@ -816,6 +826,7 @@ function clean_param($param, $type) { return $param; case PARAM_TAGLIST: + $param = fix_utf8($param); $tags = explode(',', $param); $result = array(); foreach ($tags as $tag) { @@ -872,6 +883,7 @@ function clean_param($param, $type) { } case PARAM_USERNAME: + $param = fix_utf8($param); $param = str_replace(" " , "", $param); $param = moodle_strtolower($param); // Convert uppercase to lowercase MDL-16919 if (empty($CFG->extendedusernamechars)) { @@ -882,6 +894,7 @@ function clean_param($param, $type) { return $param; case PARAM_EMAIL: + $param = fix_utf8($param); if (validate_email($param)) { return $param; } else { @@ -896,6 +909,7 @@ function clean_param($param, $type) { } case PARAM_TIMEZONE: //can be int, float(with .5 or .0) or string seperated by '/' and can have '-_' + $param = fix_utf8($param); $timezonepattern = '/^(([+-]?(0?[0-9](\.[5|0])?|1[0-3]|1[0-2]\.5))|(99)|[[:alnum:]]+(\/?[[:alpha:]_-])+)$/'; if (preg_match($timezonepattern, $param)) { return $param; @@ -908,6 +922,44 @@ function clean_param($param, $type) { } } +/** + * Makes sure the data is using valid utf8, invalid characters are discarded. + * + * Note: this function is not intended for full objects with methods and private properties. + * + * @param mixed $value + * @return mixed with proper utf-8 encoding + */ +function fix_utf8($value) { + if (is_null($value) or $value === '') { + return $value; + + } else if (is_string($value)) { + if ((string)(int)$value === $value) { + // shortcut + return $value; + } + return iconv('UTF-8', 'UTF-8//IGNORE', $value); + + } else if (is_array($value)) { + foreach ($value as $k=>$v) { + $value[$k] = fix_utf8($v); + } + return $value; + + } else if (is_object($value)) { + $value = clone($value); // do not modify original + foreach ($value as $k=>$v) { + $value->$k = fix_utf8($v); + } + return $value; + + } else { + // this is some other type, no utf-8 here + return $value; + } +} + /** * Return true if given value is integer or string with integer value * diff --git a/lib/simpletest/testmoodlelib.php b/lib/simpletest/testmoodlelib.php index b9ebb8f437a..702ecc17e00 100644 --- a/lib/simpletest/testmoodlelib.php +++ b/lib/simpletest/testmoodlelib.php @@ -296,6 +296,27 @@ class moodlelib_test extends UnitTestCase { $this->assertEqual(array('gecko', 'gecko19'), get_browser_version_classes()); } + function test_fix_utf8() { + // make sure valid data including other types is not changed + $this->assertidentical(null, fix_utf8(null)); + $this->assertidentical(1, fix_utf8(1)); + $this->assertidentical(1.1, fix_utf8(1.1)); + $this->assertidentical(true, fix_utf8(true)); + $this->assertidentical('', fix_utf8('')); + $array = array('do', 're', 'mi'); + $this->assertidentical($array, fix_utf8($array)); + $object = new stdClass(); + $object->a = 'aa'; + $object->b = 'bb'; + $this->assertidentical($object, fix_utf8($object)); + + // valid utf8 string + $this->assertidentical("žlutý koníček přeskočil potůček \n\t\r\0", fix_utf8("žlutý koníček přeskočil potůček \n\t\r\0")); + + // invalid utf8 string + $this->assertidentical('aaabbb', fix_utf8('aaa'.chr(130).'bbb')); + } + function test_optional_param() { $_POST['username'] = 'post_user'; $_GET['username'] = 'get_user'; diff --git a/lib/weblib.php b/lib/weblib.php index 2b806de22eb..11800d93d78 100644 --- a/lib/weblib.php +++ b/lib/weblib.php @@ -757,7 +757,7 @@ function data_submitted() { if (empty($_POST)) { return false; } else { - return (object)$_POST; + return (object)fix_utf8($_POST); } } diff --git a/login/index.php b/login/index.php index 52d86a564ec..9eed7f31476 100644 --- a/login/index.php +++ b/login/index.php @@ -301,7 +301,7 @@ $PAGE->verify_https_required(); if (empty($frm->username) && $authsequence[0] != 'shibboleth') { // See bug 5184 if (!empty($_GET["username"])) { - $frm->username = $_GET["username"]; + $frm->username = clean_param($_GET["username"], PARAM_RAW); // we do not want data from _POST here } else { $frm->username = get_moodle_cookie(); } diff --git a/user/addnote.php b/user/addnote.php index b6a0e1082f1..06cae6b97f2 100644 --- a/user/addnote.php +++ b/user/addnote.php @@ -97,8 +97,8 @@ $table->align = array ('left', 'center', 'center'); $state_names = note_get_state_names(); // the first time list hack -if (empty($users)) { - foreach ($_POST as $k => $v) { +if (empty($users) and $post = data_submitted()) { + foreach ($post as $k => $v) { if (preg_match('/^user(\d+)$/',$k,$m)) { $users[] = $m[1]; } diff --git a/user/groupaddnote.php b/user/groupaddnote.php index 9cbcbf731f5..6d53875e9f7 100644 --- a/user/groupaddnote.php +++ b/user/groupaddnote.php @@ -93,8 +93,8 @@ echo ''; $state_names = note_get_state_names(); // the first time list hack -if (empty($users)) { - foreach ($_POST as $k => $v) { +if (empty($users) and $post = data_submitted()) { + foreach ($post as $k => $v) { if (preg_match('/^user(\d+)$/',$k,$m)) { $users[] = $m[1]; } diff --git a/user/messageselect.php b/user/messageselect.php index d60fec19919..84aafd9130d 100644 --- a/user/messageselect.php +++ b/user/messageselect.php @@ -91,12 +91,14 @@ $messagebody = $SESSION->emailselect[$id]['messagebody']; $count = 0; -foreach ($_POST as $k => $v) { - if (preg_match('/^(user|teacher)(\d+)$/',$k,$m)) { - if (!array_key_exists($m[2],$SESSION->emailto[$id])) { - if ($user = $DB->get_record_select('user', "id = ?", array($m[2]), 'id,firstname,lastname,idnumber,email,mailformat,lastaccess, lang')) { - $SESSION->emailto[$id][$m[2]] = $user; - $count++; +if ($post = data_submitted()) { + foreach ($data as $k => $v) { + if (preg_match('/^(user|teacher)(\d+)$/',$k,$m)) { + if (!array_key_exists($m[2],$SESSION->emailto[$id])) { + if ($user = $DB->get_record_select('user', "id = ?", array($m[2]), 'id,firstname,lastname,idnumber,email,mailformat,lastaccess, lang')) { + $SESSION->emailto[$id][$m[2]] = $user; + $count++; + } } } }