MDL-59786 analytics: Discard invalid samples

This commit is contained in:
David Monllao 2017-08-21 16:18:02 +02:00
parent b3cfb01316
commit 1a5204a427
4 changed files with 49 additions and 9 deletions

View File

@ -32,6 +32,8 @@ require_once($CFG->dirroot . '/course/format/weeks/lib.php');
$help = "Guesses course start and end dates based on activity logs.
IMPORTANT: Don't use this script if you keep previous academic years users enrolled in courses. Guesses would not be accurate.
Options:
--guessstart Guess the course start date (default to true)
--guessend Guess the course end date (default to true)
@ -183,10 +185,14 @@ function tool_analytics_calculate_course_dates($course, $options) {
$course->enddate = $guessedenddate;
if ($course->enddate > $course->startdate) {
$notification .= PHP_EOL . ' ' . get_string('enddate') . ': ' . userdate($course->enddate);
} else {
$updateit = false;
if ($course->enddate < $course->startdate) {
$notification .= PHP_EOL . ' ' . get_string('errorendbeforestart', 'analytics', userdate($course->enddate));
} else if ($course->startdate + (YEARSECS + (WEEKSECS * 4)) > $course->enddate) {
$notification .= PHP_EOL . ' ' . get_string('coursetoolong', 'analytics');
} else {
$notification .= PHP_EOL . ' ' . get_string('enddate') . ': ' . userdate($course->enddate);
$updateit = true;
}
if ($options['update']) {

View File

@ -33,7 +33,7 @@ $string['enabledtimesplittings'] = 'Time splitting methods';
$string['erroralreadypredict'] = '{$a} file has already been used to predict';
$string['errorcannotreaddataset'] = 'Dataset file {$a} can not be read';
$string['errorcannotwritedataset'] = 'Dataset file {$a} can not be written';
$string['errorendbeforestart'] = 'The guessed end date ({$a}) is before the course start date.';
$string['errorendbeforestart'] = 'The end date ({$a}) is before the course start date.';
$string['errorinvalidindicator'] = 'Invalid {$a} indicator';
$string['errorinvalidtimesplitting'] = 'Invalid time splitting, please ensure you added the class fully qualified class name';
$string['errornoindicators'] = 'This model does not have any indicator';

View File

@ -115,7 +115,7 @@ class course_dropout extends \core_analytics\local\target\binary {
}
/**
* is_valid_analysable
* Discards courses that are not yet ready to be used for training or prediction.
*
* @param \core_analytics\analysable $course
* @param bool $fortraining
@ -147,13 +147,22 @@ class course_dropout extends \core_analytics\local\target\binary {
return get_string('nocourseendtime');
}
if ($course->get_end() < $course->get_start()) {
return get_string('errorendbeforestart', 'analytics');
}
// A course that lasts longer than 1 year probably have wrong start or end dates.
if ($course->get_end() - $course->get_start() > (YEARSECS + (WEEKSECS * 4))) {
return get_string('coursetoolong', 'analytics');
}
// Ongoing courses data can not be used to train.
if ($fortraining && !$course->is_finished()) {
return get_string('coursenotyetfinished');
}
if ($fortraining) {
// Not a valid target for training if there are not enough course accesses.
// Not a valid target for training if there are not enough course accesses between the course start and end dates.
$params = array('courseid' => $course->get_id(), 'anonymous' => 0, 'start' => $course->get_start(),
'end' => $course->get_end());
@ -178,14 +187,39 @@ class course_dropout extends \core_analytics\local\target\binary {
}
/**
* All student enrolments are valid.
* Discard student enrolments that are invalid.
*
* @param int $sampleid
* @param \core_analytics\analysable $course
* @param bool $fortraining
* @return true|string
* @return bool
*/
public function is_valid_sample($sampleid, \core_analytics\analysable $course, $fortraining = true) {
$userenrol = $this->retrieve('user_enrolments', $sampleid);
if ($userenrol->timeend && $course->get_start() > $userenrol->timeend) {
// Discard enrolments which time end is prior to the course start. This should get rid of
// old user enrolments that remain on the course.
return false;
}
$limit = $course->get_start() - (YEARSECS + (WEEKSECS * 4));
if (($userenrol->timestart && $userenrol->timestart < $limit) ||
(!$userenrol->timestart && $userenrol->timecreated < $limit)) {
// Following what we do in is_valid_sample, we will discard enrolments that last more than 1 academic year
// because they have incorrect start and end dates or because they are reused along multiple years
// without removing previous academic years students. This may not be very accurate because some courses
// can last just some months, but it is better than nothing and they will be flagged as drop out anyway
// in most of the cases.
return false;
}
if (($userenrol->timestart && $userenrol->timestart > $course->get_end()) ||
(!$userenrol->timestart && $userenrol->timecreated > $course->get_end())) {
// Discard user enrolments that starts after the analysable official end.
return false;
}
return true;
}

View File

@ -134,7 +134,7 @@ class no_teaching extends \core_analytics\local\target\binary {
* @param int $sampleid
* @param \core_analytics\analysable $analysable
* @param bool $fortraining
* @return true|string
* @return bool
*/
public function is_valid_sample($sampleid, \core_analytics\analysable $analysable, $fortraining = true) {