Commit 98b2f06f authored by David Monllaó's avatar David Monllaó
Browse files

MDL-57791 mlbackend_php: Minimum 2 samples per target value

parent ee345551
......@@ -26,7 +26,7 @@ $string['accuracy'] = 'Accuracy';
$string['allpredictions'] = 'All predictions';
$string['analysingsitedata'] = 'Analysing the site';
$string['analyticmodels'] = 'Analytic models';
$string['bettercli'] = 'To evaluate models and to get predictions are heavy processes, it is better to run them through command line interface';
$string['bettercli'] = 'Evaluating models and generating predictions may involve heavy processing. It is advised that you run these actions via the command line interface';
$string['cantguessstartdate'] = 'Can\'t guess the start date';
$string['cantguessenddate'] = 'Can\'t guess the end date';
$string['clienablemodel'] = 'You can enable the model by selecting a time splitting method by its id. Note that you can also enable it later using the web interface (\'none\' to exit)';
......
......@@ -258,6 +258,21 @@ class processor implements \core_analytics\predictor {
}
fclose($fh);
// We need at least 2 samples belonging to each target.
$counts = array_count_values($targets);
foreach ($counts as $count) {
if ($count < 2) {
$notenoughdata = true;
}
}
if (!empty($notenoughdata)) {
$resultobj = new \stdClass();
$resultobj->status = \core_analytics\model::EVALUATE_NOT_ENOUGH_DATA;
$resultobj->score = 0;
$resultobj->info = array(get_string('errornotenoughdata', 'mlbackend_php'));
return $resultobj;
}
$phis = array();
// Evaluate the model multiple times to confirm the results are not significantly random due to a short amount of data.
......@@ -318,7 +333,7 @@ class processor implements \core_analytics\predictor {
$a = new \stdClass();
$a->deviation = $modeldev;
$a->accepteddeviation = $maxdeviation;
$resultobj->info[] = get_string('errornotenoughdata', 'mlbackend_php', $a);
$resultobj->info[] = get_string('errornotenoughdatadev', 'mlbackend_php', $a);
}
if ($resultobj->score < \core_analytics\model::MIN_SCORE) {
......
......@@ -25,6 +25,7 @@
$string['datasetsizelimited'] = 'Only a part of the evaluation dataset has been evaluated due to its size. Set $CFG->mlbackend_php_no_memory_limit if you are confident that your system can cope a {$a} dataset';
$string['errorcantloadmodel'] = 'Model file {$a} does not exist, ensure the model has been trained before using it to predict.';
$string['errorlowscore'] = 'The evaluated model prediction accuracy is not very high, some predictions may not be accurate. Model score = {$a->score}, minimum score = {$a->minscore}';
$string['errornotenoughdata'] = 'The evaluation results varied too much, you could try to gather more data to ensure the model is valid. Evaluation results standard deviation = {$a->deviation}, maximum recommended standard deviation = {$a->accepteddeviation}';
$string['errornotenoughdata'] = 'There is no enough data to evaluate this model using this time splitting method.';
$string['errornotenoughdatadev'] = 'The evaluation results varied too much, you could try to gather more data to ensure the model is valid. Evaluation results standard deviation = {$a->deviation}, maximum recommended standard deviation = {$a->accepteddeviation}';
$string['errorphp7required'] = 'The PHP machine learning backend requires PHP 7';
$string['pluginname'] = 'PHP machine learning backend';
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment