Commit bc82b895 authored by David Monllaó's avatar David Monllaó
Browse files

MDL-64787 mlbackend: Evaluate trained models

parent 74823933
......@@ -63,7 +63,9 @@ interface classifier extends predictor {
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @param string $trainedmodeldir
* @return \stdClass
*/
public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir);
public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,
$outputdir, $trainedmodeldir);
}
......@@ -537,6 +537,22 @@ class model {
}
$options['evaluation'] = true;
if (empty($options['mode'])) {
$options['mode'] = 'configuration';
}
if ($options['mode'] == 'trainedmodel') {
// We are only interested on the time splitting method used by the trained model.
$options['timesplitting'] = $this->model->timesplitting;
// Provide the trained model directory to the ML backend if that is what we want to evaluate.
$trainedmodeldir = $this->get_output_dir(['execution']);
} else {
$trainedmodeldir = false;
}
$this->init_analyser($options);
if (empty($this->get_indicators())) {
......@@ -575,10 +591,10 @@ class model {
// Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
if ($this->get_target()->is_linear()) {
$predictorresult = $predictor->evaluate_regression($this->get_unique_id(), self::ACCEPTED_DEVIATION,
self::EVALUATION_ITERATIONS, $dataset, $outputdir);
self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir);
} else {
$predictorresult = $predictor->evaluate_classification($this->get_unique_id(), self::ACCEPTED_DEVIATION,
self::EVALUATION_ITERATIONS, $dataset, $outputdir);
self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir);
}
$result->status = $predictorresult->status;
......@@ -1470,7 +1486,7 @@ class model {
*
* @return bool
*/
public function trained_locally() {
public function trained_locally() : bool {
global $DB;
if (!$this->is_trained() || $this->is_static()) {
......
......@@ -63,7 +63,9 @@ interface regressor extends predictor {
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @param string $trainedmodeldir
* @return \stdClass
*/
public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir);
public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,
$outputdir, $trainedmodeldir);
}
......@@ -274,7 +274,7 @@ class core_analytics_prediction_testcase extends advanced_testcase {
* test_ml_export_import
*
* @param string $predictionsprocessorclass The class name
* @dataProvider provider_ml_export_import
* @dataProvider provider_ml_processors
*/
public function test_ml_export_import($predictionsprocessorclass) {
......@@ -296,6 +296,7 @@ class core_analytics_prediction_testcase extends advanced_testcase {
$model->update(true, false, '\core\analytics\time_splitting\quarters', get_class($predictionsprocessor));
$model->train();
$this->assertTrue($model->trained_locally());
$this->generate_courses(10, ['visible' => 0]);
......@@ -314,16 +315,18 @@ class core_analytics_prediction_testcase extends advanced_testcase {
$this->assertEquals($importedmodelresults->predictions[$sampleid]->prediction, $prediction->prediction);
}
$this->assertFalse($importmodel->trained_locally());
set_config('enabled_stores', '', 'tool_log');
get_log_manager(true);
}
/**
* provider_ml_export_import
* provider_ml_processors
*
* @return array
*/
public function provider_ml_export_import() {
public function provider_ml_processors() {
$cases = [
'case' => [],
];
......@@ -425,14 +428,14 @@ class core_analytics_prediction_testcase extends advanced_testcase {
/**
* Basic test to check that prediction processors work as expected.
*
* @dataProvider provider_ml_test_evaluation
* @dataProvider provider_ml_test_evaluation_configuration
* @param string $modelquality
* @param int $ncourses
* @param array $expected
* @param string $predictionsprocessorclass
* @return void
*/
public function test_ml_evaluation($modelquality, $ncourses, $expected, $predictionsprocessorclass) {
public function test_ml_evaluation_configuration($modelquality, $ncourses, $expected, $predictionsprocessorclass) {
$this->resetAfterTest(true);
$this->setAdminuser();
set_config('enabled_stores', 'logstore_standard', 'tool_log');
......@@ -473,6 +476,46 @@ class core_analytics_prediction_testcase extends advanced_testcase {
get_log_manager(true);
}
/**
* Tests the evaluation of already trained models.
*
* @dataProvider provider_ml_processors
* @param string $predictionsprocessorclass
* @return null
*/
public function test_ml_evaluation_trained_model($predictionsprocessorclass) {
$this->resetAfterTest(true);
$this->setAdminuser();
set_config('enabled_stores', 'logstore_standard', 'tool_log');
set_config('timesplittings',
'\core\analytics\time_splitting\quarters,\core\analytics\time_splitting\quarters_accum', 'analytics');
$model = $this->add_perfect_model();
// Generate training data.
$this->generate_courses(50);
// We repeat the test for all prediction processors.
$predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
if ($predictionsprocessor->is_ready() !== true) {
$this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
}
$model->update(true, false, '\\core\\analytics\\time_splitting\\quarters', get_class($predictionsprocessor));
$model->train();
$zipfilename = 'model-zip-' . microtime() . '.zip';
$zipfilepath = $model->export_model($zipfilename);
$importmodel = \core_analytics\model::import_model($zipfilepath);
$results = $importmodel->evaluate(['mode' => 'trainedmodel']);
$this->assertEquals(0, $results['\\core\\analytics\\time_splitting\\quarters']->status);
$this->assertEquals(1, $results['\\core\\analytics\\time_splitting\\quarters']->score);
set_config('enabled_stores', '', 'tool_log');
get_log_manager(true);
}
/**
* test_read_indicator_calculations
*
......@@ -547,11 +590,11 @@ class core_analytics_prediction_testcase extends advanced_testcase {
}
/**
* provider_ml_test_evaluation
* provider_ml_test_evaluation_configuration
*
* @return array
*/
public function provider_ml_test_evaluation() {
public function provider_ml_test_evaluation_configuration() {
$cases = array(
'bad' => array(
......
This files describes API changes in analytics sub system,
information provided here is intended especially for developers.
=== 3.7 ===
* \core_analytics\regressor::evaluate_regression and \core_analytics\classifier::evaluate_classification
have been updated to include a new $trainedmodeldir param. This new param will be used to evaluate the
existing trained model.
=== 3.5 ===
* There are two new methods for analysers, processes_user_data() and join_sample_user(). You
......
......@@ -174,14 +174,7 @@ class processor implements \core_analytics\classifier, \core_analytics\regressor
*/
public function classify($uniqueid, \stored_file $dataset, $outputdir) {
$modelfilepath = $this->get_model_filepath($outputdir);
if (!file_exists($modelfilepath)) {
throw new \moodle_exception('errorcantloadmodel', 'mlbackend_php', '', $modelfilepath);
}
$modelmanager = new ModelManager();
$classifier = $modelmanager->restoreFromFile($modelfilepath);
$classifier = $this->load_classifier($outputdir);
$fh = $dataset->get_content_file_handle();
......@@ -244,11 +237,19 @@ class processor implements \core_analytics\classifier, \core_analytics\regressor
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @param string $trainedmodeldir
* @return \stdClass
*/
public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,
$outputdir, $trainedmodeldir) {
$fh = $dataset->get_content_file_handle();
if ($trainedmodeldir) {
// We overwrite the number of iterations as the results will always be the same.
$niterations = 1;
$classifier = $this->load_classifier($trainedmodeldir);
}
// The first lines are var names and the second one values.
$metadata = $this->extract_metadata($fh);
......@@ -308,15 +309,19 @@ class processor implements \core_analytics\classifier, \core_analytics\regressor
// Evaluate the model multiple times to confirm the results are not significantly random due to a short amount of data.
for ($i = 0; $i < $niterations; $i++) {
$classifier = new \Phpml\Classification\Linear\LogisticRegression(self::TRAIN_ITERATIONS, Normalizer::NORM_L2);
// Split up the dataset in classifier and testing.
$data = new RandomSplit(new ArrayDataset($samples, $targets), 0.2);
if (!$trainedmodeldir) {
$classifier = new \Phpml\Classification\Linear\LogisticRegression(self::TRAIN_ITERATIONS, Normalizer::NORM_L2);
$classifier->train($data->getTrainSamples(), $data->getTrainLabels());
// Split up the dataset in classifier and testing.
$data = new RandomSplit(new ArrayDataset($samples, $targets), 0.2);
$predictedlabels = $classifier->predict($data->getTestSamples());
$phis[] = $this->get_phi($data->getTestLabels(), $predictedlabels);
$classifier->train($data->getTrainSamples(), $data->getTrainLabels());
$predictedlabels = $classifier->predict($data->getTestSamples());
$phis[] = $this->get_phi($data->getTestLabels(), $predictedlabels);
} else {
$predictedlabels = $classifier->predict($samples);
$phis[] = $this->get_phi($targets, $predictedlabels);
}
}
// Let's fill the results changing the returned status code depending on the phi-related calculated metrics.
......@@ -381,6 +386,24 @@ class processor implements \core_analytics\classifier, \core_analytics\regressor
return $resultobj;
}
/**
* Loads the pre-trained classifier.
*
* @throws \moodle_exception
* @param string $outputdir
* @return \Phpml\Classification\Linear\LogisticRegression
*/
protected function load_classifier($outputdir) {
$modelfilepath = $this->get_model_filepath($outputdir);
if (!file_exists($modelfilepath)) {
throw new \moodle_exception('errorcantloadmodel', 'mlbackend_php', '', $modelfilepath);
}
$modelmanager = new ModelManager();
return $modelmanager->restoreFromFile($modelfilepath);
}
/**
* Train this processor regression model using the provided supervised learning dataset.
*
......@@ -416,9 +439,11 @@ class processor implements \core_analytics\classifier, \core_analytics\regressor
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @param string $trainedmodeldir
* @return \stdClass
*/
public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,
$outputdir, $trainedmodeldir) {
throw new \coding_exception('This predictor does not support regression yet.');
}
......
......@@ -229,9 +229,11 @@ class processor implements \core_analytics\classifier, \core_analytics\regresso
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @param string $trainedmodeldir
* @return \stdClass
*/
public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,
$outputdir, $trainedmodeldir) {
// Obtain the physical route to the file.
$datasetpath = $this->get_file_path($dataset);
......@@ -244,6 +246,10 @@ class processor implements \core_analytics\classifier, \core_analytics\regresso
escapeshellarg($maxdeviation) . ' ' .
escapeshellarg($niterations);
if ($trainedmodeldir) {
$cmd .= ' ' . escapeshellarg($trainedmodeldir);
}
if (!PHPUNIT_TEST && CLI_SCRIPT) {
debugging($cmd, DEBUG_DEVELOPER);
}
......@@ -370,9 +376,11 @@ class processor implements \core_analytics\classifier, \core_analytics\regresso
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @param string $trainedmodeldir
* @return \stdClass
*/
public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,
$outputdir, $trainedmodeldir) {
throw new \coding_exception('This predictor does not support regression yet.');
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment