From c9840c0144efaa9dd201fb3b1b3c3dae021858c1 Mon Sep 17 00:00:00 2001 From: "Eloy Lafuente (stronk7)" Date: Thu, 9 Aug 2018 16:05:55 +0200 Subject: [PATCH] MDL-62619 privacy: Prevent action when boolean queries are involved Before the patch, queries like: SELECT 1 FROM dual UNION SELECT 2 FROM dual were failing badly, with everything but the first numeric element being ignored by the optimization. So, being conservative, now we reduce the query being analysed, ignoring any subquery, inline view (anything within parenthesis in general) and, in the remaining query, if a boolean query (UNION, MINUS, INTERSECT...) is found, we don't apply any optimization. --- privacy/classes/local/request/contextlist.php | 15 ++++++++++++- privacy/tests/contextlist_test.php | 21 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/privacy/classes/local/request/contextlist.php b/privacy/classes/local/request/contextlist.php index 4e391abcb75..b9b2cd09cf7 100644 --- a/privacy/classes/local/request/contextlist.php +++ b/privacy/classes/local/request/contextlist.php @@ -138,6 +138,12 @@ class contextlist extends contextlist_base { * @return string The field name or a numeric value representing the context id */ protected function guess_id_field_from_sql(string $sql) : string { + // We are not interested in any subquery/view/conditions for the purpose of this method, so + // let's reduce the query to the interesting parts by recursively cleaning all + // contents within parenthesis. If there are problems (null), we keep the text unmodified. + // So just top-level sql will remain after the reduction. + $recursiveregexp = '/\((([^()]*|(?R))*)\)/'; + $sql = (preg_replace($recursiveregexp, '', $sql) ?: $sql); // Get the list of relevant words from the SQL Query. // We explode the SQL by the space character, then trim any extra whitespace (e.g. newlines), before we filter // empty value, and finally we re-index the array. @@ -147,8 +153,15 @@ class contextlist extends contextlist_base { return $word !== ''; }); $words = array_values($words); + $uwords = array_map('strtoupper', $words); // Uppercase all them. - if ($firstfrom = array_search('FROM', array_map('strtoupper', $words))) { + // If the query has boolean operators (UNION, it is the only one we support cross-db) + // then we cannot guarantee whats coming after the first query, it can be anything. + if (array_search('UNION', $uwords)) { + return ''; + } + + if ($firstfrom = array_search('FROM', $uwords)) { // Found a FROM keyword. // Select the previous word. $fieldname = $words[$firstfrom - 1]; diff --git a/privacy/tests/contextlist_test.php b/privacy/tests/contextlist_test.php index a247b3d3ab9..43d8b74ea9d 100644 --- a/privacy/tests/contextlist_test.php +++ b/privacy/tests/contextlist_test.php @@ -176,6 +176,27 @@ class contextlist_test extends advanced_testcase { 'SELECT 4 FROM users', '4', ], + 'select_with_complex_subqueries' => [ + 'SELECT id FROM table WHERE id IN ( + SELECT x FROM xtable + UNION + SELECT y FROM ( + SELECT y FROM ytable + JOIN ztable ON (z = y)))', + 'id' + ], + 'invalid_union_with_first_being_column_name' => [ + 'SELECT id FROM table UNION SELECT 1 FROM table', + '' + ], + 'invalid_union_with_first_being_numeric' => [ + 'SELECT 1 FROM table UNION SELECT id FROM table', + '' + ], + 'invalid_union_without_from' => [ + 'SELECT 1 UNION SELECT id FROM table', + '' + ], 'invalid_1' => [ 'SELECT 1+1', '',