mirror of
https://github.com/moodle/moodle.git
synced 2025-01-19 06:18:28 +01:00
MDL-59786 analytics: Discard invalid samples
This commit is contained in:
parent
b3cfb01316
commit
1a5204a427
@ -32,6 +32,8 @@ require_once($CFG->dirroot . '/course/format/weeks/lib.php');
|
||||
|
||||
$help = "Guesses course start and end dates based on activity logs.
|
||||
|
||||
IMPORTANT: Don't use this script if you keep previous academic years users enrolled in courses. Guesses would not be accurate.
|
||||
|
||||
Options:
|
||||
--guessstart Guess the course start date (default to true)
|
||||
--guessend Guess the course end date (default to true)
|
||||
@ -183,10 +185,14 @@ function tool_analytics_calculate_course_dates($course, $options) {
|
||||
|
||||
$course->enddate = $guessedenddate;
|
||||
|
||||
if ($course->enddate > $course->startdate) {
|
||||
$notification .= PHP_EOL . ' ' . get_string('enddate') . ': ' . userdate($course->enddate);
|
||||
} else {
|
||||
$updateit = false;
|
||||
if ($course->enddate < $course->startdate) {
|
||||
$notification .= PHP_EOL . ' ' . get_string('errorendbeforestart', 'analytics', userdate($course->enddate));
|
||||
} else if ($course->startdate + (YEARSECS + (WEEKSECS * 4)) > $course->enddate) {
|
||||
$notification .= PHP_EOL . ' ' . get_string('coursetoolong', 'analytics');
|
||||
} else {
|
||||
$notification .= PHP_EOL . ' ' . get_string('enddate') . ': ' . userdate($course->enddate);
|
||||
$updateit = true;
|
||||
}
|
||||
|
||||
if ($options['update']) {
|
||||
|
@ -33,7 +33,7 @@ $string['enabledtimesplittings'] = 'Time splitting methods';
|
||||
$string['erroralreadypredict'] = '{$a} file has already been used to predict';
|
||||
$string['errorcannotreaddataset'] = 'Dataset file {$a} can not be read';
|
||||
$string['errorcannotwritedataset'] = 'Dataset file {$a} can not be written';
|
||||
$string['errorendbeforestart'] = 'The guessed end date ({$a}) is before the course start date.';
|
||||
$string['errorendbeforestart'] = 'The end date ({$a}) is before the course start date.';
|
||||
$string['errorinvalidindicator'] = 'Invalid {$a} indicator';
|
||||
$string['errorinvalidtimesplitting'] = 'Invalid time splitting, please ensure you added the class fully qualified class name';
|
||||
$string['errornoindicators'] = 'This model does not have any indicator';
|
||||
|
@ -115,7 +115,7 @@ class course_dropout extends \core_analytics\local\target\binary {
|
||||
}
|
||||
|
||||
/**
|
||||
* is_valid_analysable
|
||||
* Discards courses that are not yet ready to be used for training or prediction.
|
||||
*
|
||||
* @param \core_analytics\analysable $course
|
||||
* @param bool $fortraining
|
||||
@ -147,13 +147,22 @@ class course_dropout extends \core_analytics\local\target\binary {
|
||||
return get_string('nocourseendtime');
|
||||
}
|
||||
|
||||
if ($course->get_end() < $course->get_start()) {
|
||||
return get_string('errorendbeforestart', 'analytics');
|
||||
}
|
||||
|
||||
// A course that lasts longer than 1 year probably have wrong start or end dates.
|
||||
if ($course->get_end() - $course->get_start() > (YEARSECS + (WEEKSECS * 4))) {
|
||||
return get_string('coursetoolong', 'analytics');
|
||||
}
|
||||
|
||||
// Ongoing courses data can not be used to train.
|
||||
if ($fortraining && !$course->is_finished()) {
|
||||
return get_string('coursenotyetfinished');
|
||||
}
|
||||
|
||||
if ($fortraining) {
|
||||
// Not a valid target for training if there are not enough course accesses.
|
||||
// Not a valid target for training if there are not enough course accesses between the course start and end dates.
|
||||
|
||||
$params = array('courseid' => $course->get_id(), 'anonymous' => 0, 'start' => $course->get_start(),
|
||||
'end' => $course->get_end());
|
||||
@ -178,14 +187,39 @@ class course_dropout extends \core_analytics\local\target\binary {
|
||||
}
|
||||
|
||||
/**
|
||||
* All student enrolments are valid.
|
||||
* Discard student enrolments that are invalid.
|
||||
*
|
||||
* @param int $sampleid
|
||||
* @param \core_analytics\analysable $course
|
||||
* @param bool $fortraining
|
||||
* @return true|string
|
||||
* @return bool
|
||||
*/
|
||||
public function is_valid_sample($sampleid, \core_analytics\analysable $course, $fortraining = true) {
|
||||
|
||||
$userenrol = $this->retrieve('user_enrolments', $sampleid);
|
||||
if ($userenrol->timeend && $course->get_start() > $userenrol->timeend) {
|
||||
// Discard enrolments which time end is prior to the course start. This should get rid of
|
||||
// old user enrolments that remain on the course.
|
||||
return false;
|
||||
}
|
||||
|
||||
$limit = $course->get_start() - (YEARSECS + (WEEKSECS * 4));
|
||||
if (($userenrol->timestart && $userenrol->timestart < $limit) ||
|
||||
(!$userenrol->timestart && $userenrol->timecreated < $limit)) {
|
||||
// Following what we do in is_valid_sample, we will discard enrolments that last more than 1 academic year
|
||||
// because they have incorrect start and end dates or because they are reused along multiple years
|
||||
// without removing previous academic years students. This may not be very accurate because some courses
|
||||
// can last just some months, but it is better than nothing and they will be flagged as drop out anyway
|
||||
// in most of the cases.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (($userenrol->timestart && $userenrol->timestart > $course->get_end()) ||
|
||||
(!$userenrol->timestart && $userenrol->timecreated > $course->get_end())) {
|
||||
// Discard user enrolments that starts after the analysable official end.
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -134,7 +134,7 @@ class no_teaching extends \core_analytics\local\target\binary {
|
||||
* @param int $sampleid
|
||||
* @param \core_analytics\analysable $analysable
|
||||
* @param bool $fortraining
|
||||
* @return true|string
|
||||
* @return bool
|
||||
*/
|
||||
public function is_valid_sample($sampleid, \core_analytics\analysable $analysable, $fortraining = true) {
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user