From abafbc840555931b2ee1a392d6ccf466f87c7cfa Mon Sep 17 00:00:00 2001 From: David Monllao Date: Wed, 6 Sep 2017 18:42:46 +0200 Subject: [PATCH] MDL-59630 analytics: mlbackend model directories deletion --- analytics/classes/model.php | 54 ++++++++++++++++------ analytics/classes/predictor.php | 33 +++++++++++++ analytics/tests/model_test.php | 16 +++++-- lib/mlbackend/php/classes/processor.php | 21 +++++++++ lib/mlbackend/python/classes/processor.php | 21 +++++++++ 5 files changed, 127 insertions(+), 18 deletions(-) diff --git a/analytics/classes/model.php b/analytics/classes/model.php index d8b1ab2216f..4bf9ea1b9db 100644 --- a/analytics/classes/model.php +++ b/analytics/classes/model.php @@ -435,14 +435,20 @@ class model { if ($this->model->timesplitting !== $timesplittingid || $this->model->indicators !== $indicatorsstr) { + + // Delete generated predictions before changing the model version. + $this->clear_model(); + + // It needs to be reset as the version changes. + $this->uniqueid = null; + // We update the version of the model so different time splittings are not mixed up. $this->model->version = $now; - // Delete generated predictions. - $this->clear_model(); - // Reset trained flag. - $this->model->trained = 0; + if (!$this->is_static()) { + $this->model->trained = 0; + } } else if ($this->model->enabled != $enabled) { // We purge the cached contexts with insights as some will not be visible anymore. @@ -456,9 +462,6 @@ class model { $this->model->usermodified = $USER->id; $DB->update_record('analytics_models', $this->model); - - // It needs to be reset (just in case, we may already used it). - $this->uniqueid = null; } /** @@ -472,6 +475,11 @@ class model { \core_analytics\manager::check_can_manage_models(); $this->clear_model(); + + // Method self::clear_model is already clearing the current model version. + $predictor = \core_analytics\manager::get_predictions_processor(); + $predictor->delete_output_dir($this->get_output_dir(array(), true)); + $DB->delete_records('analytics_models', array('id' => $this->model->id)); $DB->delete_records('analytics_models_log', array('modelid' => $this->model->id)); } @@ -974,13 +982,23 @@ class model { throw new \moodle_exception('errorinvalidtimesplitting', 'analytics'); } + // Delete generated predictions before changing the model version. + $this->clear_model(); + + // It needs to be reset as the version changes. + $this->uniqueid = null; + $this->model->timesplitting = $timesplittingid; $this->model->version = $now; + + // Reset trained flag. + if (!$this->is_static()) { + $this->model->trained = 0; + } } // Purge pages with insights as this may change things. - if ($timesplittingid && $timesplittingid !== $this->model->timesplitting || - $this->model->enabled != 1) { + if ($this->model->enabled != 1) { $this->purge_insights_cache(); } @@ -989,9 +1007,6 @@ class model { // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates. $DB->update_record('analytics_models', $this->model); - - // It needs to be reset (just in case, we may already used it). - $this->uniqueid = null; } /** @@ -1229,9 +1244,10 @@ class model { * models/$model->id/$model->version/execution * * @param array $subdirs + * @param bool $onlymodelid Preference over $subdirs * @return string */ - protected function get_output_dir($subdirs = array()) { + protected function get_output_dir($subdirs = array(), $onlymodelid = false) { global $CFG; $subdirstr = ''; @@ -1245,8 +1261,12 @@ class model { $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models'; } - // Append model id and version + subdirs. - $outputdir .= DIRECTORY_SEPARATOR . $this->model->id . DIRECTORY_SEPARATOR . $this->model->version . $subdirstr; + // Append model id + $outputdir .= DIRECTORY_SEPARATOR . $this->model->id; + if (!$onlymodelid) { + // Append version + subdirs. + $outputdir .= DIRECTORY_SEPARATOR . $this->model->version . $subdirstr; + } make_writable_directory($outputdir); @@ -1411,6 +1431,10 @@ class model { private function clear_model() { global $DB; + // Delete current model version stored stuff. + $predictor = \core_analytics\manager::get_predictions_processor(); + $predictor->clear_model($this->get_unique_id(), $this->get_output_dir()); + $predictionids = $DB->get_fieldset_select('analytics_predictions', 'id', 'modelid = :modelid', array('modelid' => $this->get_id())); if ($predictionids) { diff --git a/analytics/classes/predictor.php b/analytics/classes/predictor.php index 4b4548b4084..4dcb4913b33 100644 --- a/analytics/classes/predictor.php +++ b/analytics/classes/predictor.php @@ -41,4 +41,37 @@ interface predictor { * @return bool */ public function is_ready(); + + /** + * Delete all stored information of the current model id. + * + * This method is called when there are important changes to a model, + * all previous training algorithms using that version of the model + * should be deleted. + * + * In case you want to perform extra security measures before deleting + * a directory you can check that $modelversionoutputdir subdirectories + * can only be named 'execution', 'evaluation' or 'testing'. + * + * @param string $uniqueid The site model unique id string + * @param string $modelversionoutputdir The output dir of this model version + * @return null + */ + public function clear_model($uniqueid, $modelversionoutputdir); + + /** + * Delete the output directory. + * + * This method is called when a model is completely deleted. + * + * In case you want to perform extra security measures before deleting + * a directory you can check that the subdirectories are timestamps + * (the model version) and each of this subdirectories' subdirectories + * can only be named 'execution', 'evaluation' or 'testing'. + * + * @param string $modeloutputdir The model directory id (parent of all model versions subdirectories). + * @return null + */ + public function delete_output_dir($modeloutputdir); + } diff --git a/analytics/tests/model_test.php b/analytics/tests/model_test.php index 48bd6300762..fcb7eaceb83 100644 --- a/analytics/tests/model_test.php +++ b/analytics/tests/model_test.php @@ -99,6 +99,9 @@ class analytics_model_testcase extends advanced_testcase { // Fake evaluation results record to check that it is actually deleted. $this->add_fake_log(); + $modeloutputdir = $this->model->get_output_dir(array(), true); + $this->assertTrue(is_dir($modeloutputdir)); + // Generate a prediction action to confirm that it is deleted when there is an important update. $predictions = $DB->get_records('analytics_predictions'); $prediction = reset($predictions); @@ -113,6 +116,7 @@ class analytics_model_testcase extends advanced_testcase { $this->assertEmpty($DB->count_records('analytics_train_samples')); $this->assertEmpty($DB->count_records('analytics_predict_samples')); $this->assertEmpty($DB->count_records('analytics_used_files')); + $this->assertFalse(is_dir($modeloutputdir)); set_config('enabled_stores', '', 'tool_log'); get_log_manager(true); @@ -146,8 +150,13 @@ class analytics_model_testcase extends advanced_testcase { $prediction = new \core_analytics\prediction($prediction, array('whatever' => 'not used')); $prediction->action_executed(\core_analytics\prediction::ACTION_FIXED, $this->model->get_target()); + $modelversionoutputdir = $this->model->get_output_dir(); + $this->assertTrue(is_dir($modelversionoutputdir)); + // Update to an empty time splitting method to force clear_model execution. $this->model->update(1, false, ''); + $this->assertFalse(is_dir($modelversionoutputdir)); + // Restore previous time splitting method. $this->model->enable('\core\analytics\time_splitting\no_splitting'); @@ -186,7 +195,7 @@ class analytics_model_testcase extends advanced_testcase { $modeldir = $dir . DIRECTORY_SEPARATOR . $this->modelobj->id . DIRECTORY_SEPARATOR . $this->modelobj->version; $this->assertEquals($modeldir, $this->model->get_output_dir()); - $this->assertEquals($modeldir . DIRECTORY_SEPARATOR . 'asd', $this->model->get_output_dir(array('asd'))); + $this->assertEquals($modeldir . DIRECTORY_SEPARATOR . 'testing', $this->model->get_output_dir(array('testing'))); } public function test_unique_id() { @@ -280,10 +289,11 @@ class testable_model extends \core_analytics\model { * get_output_dir * * @param array $subdirs + * @param bool $onlymodelid * @return string */ - public function get_output_dir($subdirs = array()) { - return parent::get_output_dir($subdirs); + public function get_output_dir($subdirs = array(), $onlymodelid = false) { + return parent::get_output_dir($subdirs, $onlymodelid); } /** diff --git a/lib/mlbackend/php/classes/processor.php b/lib/mlbackend/php/classes/processor.php index 9cbbdf3584d..9a84c5cb88f 100644 --- a/lib/mlbackend/php/classes/processor.php +++ b/lib/mlbackend/php/classes/processor.php @@ -72,6 +72,27 @@ class processor implements \core_analytics\classifier, \core_analytics\regressor return true; } + /** + * Delete the stored models. + * + * @param string $uniqueid + * @param string $modelversionoutputdir + * @return null + */ + public function clear_model($uniqueid, $modelversionoutputdir) { + remove_dir($modelversionoutputdir); + } + + /** + * Delete the output directory. + * + * @param string $modeloutputdir + * @return null + */ + public function delete_output_dir($modeloutputdir) { + remove_dir($modeloutputdir); + } + /** * Train this processor classification model using the provided supervised learning dataset. * diff --git a/lib/mlbackend/python/classes/processor.php b/lib/mlbackend/python/classes/processor.php index faa9f6cf61b..1ba59c8ff5a 100644 --- a/lib/mlbackend/python/classes/processor.php +++ b/lib/mlbackend/python/classes/processor.php @@ -94,6 +94,27 @@ class processor implements \core_analytics\classifier, \core_analytics\regresso return get_string('pythonpackagenotinstalled', 'mlbackend_python', $cmd); } + /** + * Delete the model version output directory. + * + * @param string $uniqueid + * @param string $modelversionoutputdir + * @return null + */ + public function clear_model($uniqueid, $modelversionoutputdir) { + remove_dir($modelversionoutputdir); + } + + /** + * Delete the model output directory. + * + * @param string $modeloutputdir + * @return null + */ + public function delete_output_dir($modeloutputdir) { + remove_dir($modeloutputdir); + } + /** * Trains a machine learning algorithm with the provided dataset. *