Merge branch 'MDL-71715-master' of https://github.com/ilyatregubov/moodle

This commit is contained in:
Eloy Lafuente (stronk7) 2022-01-17 18:45:37 +01:00
commit 12be973ee6
47 changed files with 637 additions and 121 deletions

View File

@ -1,6 +1,6 @@
The MIT License (MIT)
Copyright (c) 2016-2018 Arkadiusz Kondas <arkadiusz.kondas[at]gmail>
Copyright (c) 2016-2020 Arkadiusz Kondas <arkadiusz.kondas[at]gmail>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -4,3 +4,5 @@ Current version is 0.8.0
# Remove all files but:
* src/
* LICENSE
# Copy content of src/ to /path/to/moodle/lib/mlbackend/php/phpml/src/Phpml
# Copy LICENSE file to /path/to/moodle/lib/mlbackend/php/phpml

View File

@ -104,11 +104,11 @@ class Apriori implements Associator
*/
protected function predictSample(array $sample): array
{
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample): bool {
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
}));
return array_map(function ($rule) {
return array_map(static function ($rule) {
return $rule[self::ARRAY_KEY_CONSEQUENT];
}, $predicts);
}
@ -177,7 +177,7 @@ class Apriori implements Associator
$cardinality = count($sample);
$antecedents = $this->powerSet($sample);
return array_filter($antecedents, function ($antecedent) use ($cardinality) {
return array_filter($antecedents, static function ($antecedent) use ($cardinality): bool {
return (count($antecedent) != $cardinality) && ($antecedent != []);
});
}
@ -199,7 +199,7 @@ class Apriori implements Associator
}
}
return array_map(function ($entry) {
return array_map(static function ($entry): array {
return [$entry];
}, $items);
}
@ -213,7 +213,7 @@ class Apriori implements Associator
*/
private function frequent(array $samples): array
{
return array_values(array_filter($samples, function ($entry) {
return array_values(array_filter($samples, function ($entry): bool {
return $this->support($entry) >= $this->support;
}));
}
@ -288,7 +288,7 @@ class Apriori implements Associator
*/
private function frequency(array $sample): int
{
return count(array_filter($this->samples, function ($entry) use ($sample) {
return count(array_filter($this->samples, function ($entry) use ($sample): bool {
return $this->subset($entry, $sample);
}));
}
@ -303,7 +303,7 @@ class Apriori implements Associator
*/
private function contains(array $system, array $set): bool
{
return (bool) array_filter($system, function ($entry) use ($set) {
return (bool) array_filter($system, function ($entry) use ($set): bool {
return $this->equals($entry, $set);
});
}

View File

@ -41,7 +41,7 @@ class RandomForest extends Bagging
* Default value for the ratio is 'log' which results in log(numFeatures, 2) + 1
* features to be taken into consideration while selecting subspace of features
*
* @param string|float $ratio
* @param mixed $ratio
*/
public function setFeatureSubsetRatio($ratio): self
{
@ -73,7 +73,9 @@ class RandomForest extends Bagging
throw new InvalidArgumentException('RandomForest can only use DecisionTree as base classifier');
}
return parent::setClassifer($classifier, $classifierOptions);
parent::setClassifer($classifier, $classifierOptions);
return $this;
}
/**
@ -122,12 +124,16 @@ class RandomForest extends Bagging
}
/**
* @param DecisionTree $classifier
*
* @return DecisionTree
*/
protected function initSingleClassifier(Classifier $classifier): Classifier
{
if (!$classifier instanceof DecisionTree) {
throw new InvalidArgumentException(
sprintf('Classifier %s expected, got %s', DecisionTree::class, get_class($classifier))
);
}
if (is_float($this->featureSubsetRatio)) {
$featureCount = (int) ($this->featureSubsetRatio * $this->featureCount);
} elseif ($this->featureSubsetRatio === 'sqrt') {

View File

@ -58,7 +58,7 @@ class Adaline extends Perceptron
protected function runTraining(array $samples, array $targets): void
{
// The cost function is the sum of squares
$callback = function ($weights, $sample, $target) {
$callback = function ($weights, $sample, $target): array {
$this->weights = $weights;
$output = $this->output($sample);

View File

@ -188,7 +188,7 @@ class LogisticRegression extends Adaline
* The gradient of the cost function to be used with gradient descent:
* ∇J(x) = -(y - h(x)) = (h(x) - y)
*/
return function ($weights, $sample, $y) use ($penalty) {
return function ($weights, $sample, $y) use ($penalty): array {
$this->weights = $weights;
$hX = $this->output($sample);
@ -220,13 +220,13 @@ class LogisticRegression extends Adaline
* The gradient of the cost function:
* ∇J(x) = -(h(x) - y) . h(x) . (1 - h(x))
*/
return function ($weights, $sample, $y) use ($penalty) {
return function ($weights, $sample, $y) use ($penalty): array {
$this->weights = $weights;
$hX = $this->output($sample);
$y = $y < 0 ? 0 : 1;
$error = ($y - $hX) ** 2;
$error = (($y - $hX) ** 2);
$gradient = -($y - $hX) * $hX * (1 - $hX);
return [$error, $gradient, $penalty];

View File

@ -154,7 +154,7 @@ class Perceptron implements Classifier, IncrementalEstimator
protected function runTraining(array $samples, array $targets): void
{
// The cost function is the sum of squares
$callback = function ($weights, $sample, $target) {
$callback = function ($weights, $sample, $target): array {
$this->weights = $weights;
$prediction = $this->outputClass($sample);

View File

@ -77,9 +77,6 @@ class FuzzyCMeans implements Clusterer
return $this->membership;
}
/**
* @param Point[]|int[][] $samples
*/
public function cluster(array $samples): array
{
// Initialize variables, clusters and membership matrix
@ -142,7 +139,7 @@ class FuzzyCMeans implements Clusterer
$total += $val;
}
$this->membership[] = array_map(function ($val) use ($total) {
$this->membership[] = array_map(static function ($val) use ($total): float {
return $val / $total;
}, $row);
}
@ -210,7 +207,7 @@ class FuzzyCMeans implements Clusterer
$this->samples[$col]
);
$val = ($dist1 / $dist2) ** 2.0 / ($this->fuzziness - 1);
$val = (($dist1 / $dist2) ** 2.0) / ($this->fuzziness - 1);
$sum += $val;
}

View File

@ -88,7 +88,7 @@ class Space extends SplObjectStorage
$min = $this->newPoint(array_fill(0, $this->dimension, null));
$max = $this->newPoint(array_fill(0, $this->dimension, null));
/** @var self $point */
/** @var Point $point */
foreach ($this as $point) {
for ($n = 0; $n < $this->dimension; ++$n) {
if ($min[$n] === null || $min[$n] > $point[$n]) {
@ -167,6 +167,10 @@ class Space extends SplObjectStorage
foreach ($cluster as $point) {
$closest = $point->getClosest($clusters);
if ($closest === null) {
continue;
}
if ($closest !== $cluster) {
$attach[$closest] ?? $attach[$closest] = new SplObjectStorage();
$detach[$cluster] ?? $detach[$cluster] = new SplObjectStorage();

View File

@ -35,8 +35,8 @@ class CsvDataset extends ArrayDataset
}
$samples = $targets = [];
while (($data = fgetcsv($handle, $maxLineLength, $delimiter)) !== false) {
$samples[] = array_slice((array) $data, 0, $features);
while ($data = fgetcsv($handle, $maxLineLength, $delimiter)) {
$samples[] = array_slice($data, 0, $features);
$targets[] = $data[$features];
}

View File

@ -19,7 +19,13 @@ class FilesDataset extends ArrayDataset
private function scanRootPath(string $rootPath): void
{
foreach (glob($rootPath.DIRECTORY_SEPARATOR.'*', GLOB_ONLYDIR) as $dir) {
$dirs = glob($rootPath.DIRECTORY_SEPARATOR.'*', GLOB_ONLYDIR);
if ($dirs === false) {
throw new DatasetException(sprintf('An error occurred during directory "%s" scan', $rootPath));
}
foreach ($dirs as $dir) {
$this->scanDir($dir);
}
}
@ -28,7 +34,12 @@ class FilesDataset extends ArrayDataset
{
$target = basename($dir);
foreach (array_filter(glob($dir.DIRECTORY_SEPARATOR.'*'), 'is_file') as $file) {
$files = glob($dir.DIRECTORY_SEPARATOR.'*');
if ($files === false) {
return;
}
foreach (array_filter($files, 'is_file') as $file) {
$this->samples[] = file_get_contents($file);
$this->targets[] = $target;
}

View File

@ -24,7 +24,7 @@ class SvmDataset extends ArrayDataset
$targets = [];
$maxIndex = 0;
while (false !== $line = fgets($handle)) {
[$sample, $target, $maxIndex] = self::processLine((string) $line, $maxIndex);
[$sample, $target, $maxIndex] = self::processLine($line, $maxIndex);
$samples[] = $sample;
$targets[] = $target;
}

View File

@ -179,13 +179,13 @@ class KernelPCA extends PCA
// k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
$dist = new Euclidean();
return function ($x, $y) use ($dist) {
return function ($x, $y) use ($dist): float {
return exp(-$this->gamma * $dist->sqDistance($x, $y));
};
case self::KERNEL_SIGMOID:
// k(x,y)=tanh(γ.xT.y+c0) where c0=1
return function ($x, $y) {
return function ($x, $y): float {
$res = Matrix::dot($x, $y)[0] + 1.0;
return tanh((float) $this->gamma * $res);
@ -195,7 +195,7 @@ class KernelPCA extends PCA
// k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
$dist = new Manhattan();
return function ($x, $y) use ($dist) {
return function ($x, $y) use ($dist): float {
return exp(-$this->gamma * $dist->distance($x, $y));
};

View File

@ -0,0 +1,30 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureExtraction\StopWords;
use Phpml\FeatureExtraction\StopWords;
final class Russian extends StopWords
{
/**
* @var array
*/
protected $stopWords = [
'и', 'в', 'во', 'не', 'что', 'он', 'на', 'я', 'с', 'со', 'как', 'а', 'то', 'все', 'она', 'так', 'его', 'но', 'да', 'ты', 'к', 'у',
'же', 'вы', 'за', 'бы', 'по', 'только', 'ее', 'мне', 'было', 'вот', 'от', 'меня', 'еще', 'нет', 'о', 'из', 'ему', 'теперь', 'когда',
'даже', 'ну', 'вдруг', 'ли', 'если', 'уже', 'или', 'ни', 'быть', 'был', 'него', 'до', 'вас', 'нибудь', 'опять', 'уж', 'вам', 'ведь',
'там', 'потом', 'себя', 'ничего', 'ей', 'может', 'они', 'тут', 'где', 'есть', 'надо', 'ней', 'для', 'мы', 'тебя', 'их', 'чем', 'была',
'сам', 'чтоб', 'без', 'будто', 'чего', 'раз', 'тоже', 'себе', 'под', 'будет', 'ж', 'тогда', 'кто', 'этот', 'того', 'потому', 'этого',
'какой', 'совсем', 'ним', 'здесь', 'этом', 'один', 'почти', 'мой', 'тем', 'чтобы', 'нее', 'сейчас', 'были', 'куда', 'зачем', 'всех',
'никогда', 'можно', 'при', 'наконец', 'два', 'об', 'другой', 'хоть', 'после', 'над', 'больше', 'тот', 'через', 'эти', 'нас', 'про',
'всего', 'них', 'какая', 'много', 'разве', 'три', 'эту', 'моя', 'впрочем', 'хорошо', 'свою', 'этой', 'перед', 'иногда', 'лучше', 'чуть',
'том', 'нельзя', 'такой', 'им', 'более', 'всегда', 'конечно', 'всю', 'между',
];
public function __construct()
{
parent::__construct($this->stopWords);
}
}

View File

@ -30,7 +30,7 @@ class TfIdfTransformer implements Transformer
}
}
public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
foreach ($sample as $index => &$feature) {

View File

@ -46,7 +46,7 @@ class TokenCountVectorizer implements Transformer
$this->buildVocabulary($samples);
}
public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
array_walk($samples, function (string &$sample): void {
$this->transformSample($sample);

View File

@ -46,7 +46,7 @@ final class UnivariateLinearRegression implements ScoringFunction
foreach (array_keys($samples[0]) as $index) {
$featureColumn = array_column($samples, $index);
$correlations[$index] =
(Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm())
Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm()
/ (new Matrix($targets, false))->frobeniusNorm();
}

View File

@ -56,7 +56,7 @@ final class SelectKBest implements Transformer
$this->keepColumns = array_slice($sorted, 0, $this->k, true);
}
public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
if ($this->keepColumns === null) {
return;

View File

@ -37,7 +37,7 @@ final class VarianceThreshold implements Transformer
public function fit(array $samples, ?array $targets = null): void
{
$this->variances = array_map(function (array $column) {
$this->variances = array_map(static function (array $column): float {
return Variance::population($column);
}, Matrix::transposeArray($samples));
@ -48,7 +48,7 @@ final class VarianceThreshold implements Transformer
}
}
public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
$sample = array_values(array_intersect_key($sample, $this->keepColumns));

View File

@ -0,0 +1,72 @@
<?php
declare(strict_types=1);
namespace Phpml;
use Phpml\Exception\InvalidArgumentException;
final class FeatureUnion implements Transformer
{
/**
* @var Pipeline[]
*/
private $pipelines = [];
/**
* @var Pipeline[]
*/
public function __construct(array $pipelines)
{
if ($pipelines === []) {
throw new InvalidArgumentException('At least one pipeline is required');
}
$this->pipelines = array_map(static function (Pipeline $pipeline): Pipeline {
return $pipeline;
}, $pipelines);
}
public function fit(array $samples, ?array $targets = null): void
{
$originSamples = $samples;
foreach ($this->pipelines as $pipeline) {
foreach ($pipeline->getTransformers() as $transformer) {
$transformer->fit($samples, $targets);
$transformer->transform($samples, $targets);
}
$samples = $originSamples;
}
}
public function transform(array &$samples, ?array &$targets = null): void
{
$this->transformSamples($samples, $targets);
}
public function fitAndTransform(array &$samples, ?array &$targets = null): void
{
$this->transformSamples($samples, $targets, true);
}
private function transformSamples(array &$samples, ?array &$targets = null, bool $fit = false): void
{
$union = [];
$originSamples = $samples;
foreach ($this->pipelines as $pipeline) {
foreach ($pipeline->getTransformers() as $transformer) {
if ($fit) {
$transformer->fit($samples, $targets);
}
$transformer->transform($samples, $targets);
}
foreach ($samples as $index => $sample) {
$union[$index] = array_merge($union[$index] ?? [], is_array($sample) ? $sample : [$sample]);
}
$samples = $originSamples;
}
$samples = $union;
}
}

View File

@ -38,7 +38,7 @@ class GD extends StochasticGD
$this->updateWeightsWithUpdates($updates, $totalPenalty);
$this->costValues[] = array_sum($errors) / $this->sampleCount;
$this->costValues[] = array_sum($errors) / (int) $this->sampleCount;
if ($this->earlyStop($theta)) {
break;

View File

@ -48,6 +48,11 @@ abstract class Optimizer
return $this;
}
public function theta(): array
{
return $this->theta;
}
/**
* Executes the optimization with the given samples & targets
* and returns the weights

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Math\Kernel;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Math\Kernel;
use Phpml\Math\Product;
@ -19,12 +20,12 @@ class RBF implements Kernel
$this->gamma = $gamma;
}
/**
* @param array $a
* @param array $b
*/
public function compute($a, $b): float
{
if (!is_array($a) || !is_array($b)) {
throw new InvalidArgumentException(sprintf('Arguments of %s must be arrays', __METHOD__));
}
$score = 2 * Product::scalar($a, $b);
$squares = Product::scalar($a, $a) + Product::scalar($b, $b);

View File

@ -502,7 +502,8 @@ class EigenvalueDecomposition
}
// Double division avoids possible underflow
$g = ($g / $this->ort[$m]) / $this->H[$m][$m - 1];
$g /= $this->ort[$m];
$g /= $this->H[$m][$m - 1];
for ($i = $m; $i <= $high; ++$i) {
$this->V[$i][$j] += $g * $this->ort[$i];
}
@ -734,7 +735,7 @@ class EigenvalueDecomposition
// Double QR step involving rows l:n and columns m:n
for ($k = $m; $k <= $n - 1; ++$k) {
$notlast = ($k != $n - 1);
$notlast = $k != $n - 1;
if ($k != $m) {
$p = $this->H[$k][$k - 1];
$q = $this->H[$k + 1][$k - 1];

View File

@ -126,7 +126,7 @@ class Matrix
public function transpose(): self
{
if ($this->rows === 1) {
$matrix = array_map(function ($el) {
$matrix = array_map(static function ($el): array {
return [$el];
}, $this->matrix[0]);
} else {
@ -201,7 +201,7 @@ class Matrix
*/
public function add(self $other): self
{
return $this->_add($other);
return $this->sum($other);
}
/**
@ -209,7 +209,7 @@ class Matrix
*/
public function subtract(self $other): self
{
return $this->_add($other, -1);
return $this->sum($other, -1);
}
public function inverse(): self
@ -297,7 +297,7 @@ class Matrix
/**
* Element-wise addition or substraction depending on the given sign parameter
*/
private function _add(self $other, int $sign = 1): self
private function sum(self $other, int $sign = 1): self
{
$a1 = $this->toArray();
$a2 = $other->toArray();

View File

@ -28,7 +28,7 @@ final class ANOVA
throw new InvalidArgumentException('The array must have at least 2 elements');
}
$samplesPerClass = array_map(function (array $class): int {
$samplesPerClass = array_map(static function (array $class): int {
return count($class);
}, $samples);
$allSamples = (int) array_sum($samplesPerClass);
@ -41,10 +41,14 @@ final class ANOVA
$dfbn = $classes - 1;
$dfwn = $allSamples - $classes;
$msb = array_map(function ($s) use ($dfbn) {
$msb = array_map(static function ($s) use ($dfbn) {
return $s / $dfbn;
}, $ssbn);
$msw = array_map(function ($s) use ($dfwn) {
$msw = array_map(static function ($s) use ($dfwn) {
if ($dfwn === 0) {
return 1;
}
return $s / $dfwn;
}, $sswn);
@ -72,7 +76,7 @@ final class ANOVA
private static function sumOfFeaturesPerClass(array $samples): array
{
return array_map(function (array $class) {
return array_map(static function (array $class): array {
$sum = array_fill(0, count($class[0]), 0);
foreach ($class as $sample) {
foreach ($sample as $index => $feature) {
@ -93,7 +97,7 @@ final class ANOVA
}
}
return array_map(function ($sum) {
return array_map(static function ($sum) {
return $sum ** 2;
}, $squares);
}

View File

@ -50,7 +50,7 @@ class StandardDeviation
$mean = Mean::arithmetic($numbers);
return array_sum(array_map(
function ($val) use ($mean) {
static function ($val) use ($mean): float {
return ($val - $mean) ** 2;
},
$numbers

View File

@ -148,7 +148,7 @@ class ClassificationReport
$precision = $this->computePrecision($truePositive, $falsePositive);
$recall = $this->computeRecall($truePositive, $falseNegative);
$f1score = $this->computeF1Score((float) $precision, (float) $recall);
$f1score = $this->computeF1Score($precision, $recall);
$this->average = compact('precision', 'recall', 'f1score');
}
@ -186,10 +186,7 @@ class ClassificationReport
}
}
/**
* @return float|string
*/
private function computePrecision(int $truePositive, int $falsePositive)
private function computePrecision(int $truePositive, int $falsePositive): float
{
$divider = $truePositive + $falsePositive;
if ($divider == 0) {
@ -199,10 +196,7 @@ class ClassificationReport
return $truePositive / $divider;
}
/**
* @return float|string
*/
private function computeRecall(int $truePositive, int $falseNegative)
private function computeRecall(int $truePositive, int $falseNegative): float
{
$divider = $truePositive + $falseNegative;
if ($divider == 0) {

View File

@ -0,0 +1,86 @@
<?php
declare(strict_types=1);
namespace Phpml\Metric;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Math\Statistic\Correlation;
use Phpml\Math\Statistic\Mean;
final class Regression
{
public static function meanSquaredError(array $targets, array $predictions): float
{
self::assertCountEquals($targets, $predictions);
$errors = [];
foreach ($targets as $index => $target) {
$errors[] = (($target - $predictions[$index]) ** 2);
}
return Mean::arithmetic($errors);
}
public static function meanSquaredLogarithmicError(array $targets, array $predictions): float
{
self::assertCountEquals($targets, $predictions);
$errors = [];
foreach ($targets as $index => $target) {
$errors[] = log((1 + $target) / (1 + $predictions[$index])) ** 2;
}
return Mean::arithmetic($errors);
}
public static function meanAbsoluteError(array $targets, array $predictions): float
{
self::assertCountEquals($targets, $predictions);
$errors = [];
foreach ($targets as $index => $target) {
$errors[] = abs($target - $predictions[$index]);
}
return Mean::arithmetic($errors);
}
public static function medianAbsoluteError(array $targets, array $predictions): float
{
self::assertCountEquals($targets, $predictions);
$errors = [];
foreach ($targets as $index => $target) {
$errors[] = abs($target - $predictions[$index]);
}
return (float) Mean::median($errors);
}
public static function r2Score(array $targets, array $predictions): float
{
self::assertCountEquals($targets, $predictions);
return Correlation::pearson($targets, $predictions) ** 2;
}
public static function maxError(array $targets, array $predictions): float
{
self::assertCountEquals($targets, $predictions);
$errors = [];
foreach ($targets as $index => $target) {
$errors[] = abs($target - $predictions[$index]);
}
return (float) max($errors);
}
private static function assertCountEquals(array &$targets, array &$predictions): void
{
if (count($targets) !== count($predictions)) {
throw new InvalidArgumentException('Targets count must be equal with predictions count');
}
}
}

View File

@ -12,29 +12,29 @@ class ModelManager
public function saveToFile(Estimator $estimator, string $filepath): void
{
if (!is_writable(dirname($filepath))) {
throw new FileException(sprintf('File "%s" can\'t be saved.', basename($filepath)));
throw new FileException(sprintf('File "%s" cannot be saved.', basename($filepath)));
}
$serialized = serialize($estimator);
if (!isset($serialized[0])) {
throw new SerializeException(sprintf('Class "%s" can not be serialized.', gettype($estimator)));
throw new SerializeException(sprintf('Class "%s" cannot be serialized.', gettype($estimator)));
}
$result = file_put_contents($filepath, $serialized, LOCK_EX);
if ($result === false) {
throw new FileException(sprintf('File "%s" can\'t be saved.', basename($filepath)));
throw new FileException(sprintf('File "%s" cannot be saved.', basename($filepath)));
}
}
public function restoreFromFile(string $filepath): Estimator
{
if (!file_exists($filepath) || !is_readable($filepath)) {
throw new FileException(sprintf('File "%s" can\'t be open.', basename($filepath)));
throw new FileException(sprintf('File "%s" cannot be opened.', basename($filepath)));
}
$object = unserialize((string) file_get_contents($filepath), [Estimator::class]);
if ($object === false) {
throw new SerializeException(sprintf('"%s" can not be unserialized.', basename($filepath)));
$object = unserialize((string) file_get_contents($filepath));
if ($object === false || !$object instanceof Estimator) {
throw new SerializeException(sprintf('"%s" cannot be unserialized.', basename($filepath)));
}
return $object;

View File

@ -59,8 +59,14 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
/**
* @throws InvalidArgumentException
*/
public function __construct(int $inputLayerFeatures, array $hiddenLayers, array $classes, int $iterations = 10000, ?ActivationFunction $activationFunction = null, float $learningRate = 1)
{
public function __construct(
int $inputLayerFeatures,
array $hiddenLayers,
array $classes,
int $iterations = 10000,
?ActivationFunction $activationFunction = null,
float $learningRate = 1.
) {
if (count($hiddenLayers) === 0) {
throw new InvalidArgumentException('Provide at least 1 hidden layer');
}
@ -123,6 +129,16 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
return $result;
}
public function getLearningRate(): float
{
return $this->learningRate;
}
public function getBackpropagation(): Backpropagation
{
return $this->backpropagation;
}
/**
* @param mixed $target
*/

View File

@ -33,7 +33,7 @@ class Neuron implements Node
public function __construct(?ActivationFunction $activationFunction = null)
{
$this->activationFunction = $activationFunction ?: new Sigmoid();
$this->activationFunction = $activationFunction ?? new Sigmoid();
}
public function addSynapse(Synapse $synapse): void

View File

@ -24,7 +24,7 @@ class Synapse
public function __construct(Node $node, ?float $weight = null)
{
$this->node = $node;
$this->weight = $weight ?: $this->generateRandomWeight();
$this->weight = $weight ?? $this->generateRandomWeight();
}
public function getOutput(): float

View File

@ -1,14 +0,0 @@
<?php
declare(strict_types=1);
namespace Phpml\NeuralNetwork;
interface Training
{
/**
* @param array $samples
* @param array $targets
*/
public function train(array $samples, array $targets);
}

View File

@ -34,6 +34,11 @@ class Backpropagation
$this->learningRate = $learningRate;
}
public function getLearningRate(): float
{
return $this->learningRate;
}
/**
* @param mixed $targetClass
*/

View File

@ -4,7 +4,9 @@ declare(strict_types=1);
namespace Phpml;
class Pipeline implements Estimator
use Phpml\Exception\InvalidOperationException;
class Pipeline implements Estimator, Transformer
{
/**
* @var Transformer[]
@ -12,29 +14,18 @@ class Pipeline implements Estimator
private $transformers = [];
/**
* @var Estimator
* @var Estimator|null
*/
private $estimator;
/**
* @param Transformer[] $transformers
*/
public function __construct(array $transformers, Estimator $estimator)
{
foreach ($transformers as $transformer) {
$this->addTransformer($transformer);
}
$this->estimator = $estimator;
}
public function addTransformer(Transformer $transformer): void
{
$this->transformers[] = $transformer;
}
public function setEstimator(Estimator $estimator): void
public function __construct(array $transformers, ?Estimator $estimator = null)
{
$this->transformers = array_map(static function (Transformer $transformer): Transformer {
return $transformer;
}, $transformers);
$this->estimator = $estimator;
}
@ -46,16 +37,20 @@ class Pipeline implements Estimator
return $this->transformers;
}
public function getEstimator(): Estimator
public function getEstimator(): ?Estimator
{
return $this->estimator;
}
public function train(array $samples, array $targets): void
{
if ($this->estimator === null) {
throw new InvalidOperationException('Pipeline without estimator can\'t use train method');
}
foreach ($this->transformers as $transformer) {
$transformer->fit($samples, $targets);
$transformer->transform($samples);
$transformer->transform($samples, $targets);
}
$this->estimator->train($samples, $targets);
@ -66,15 +61,27 @@ class Pipeline implements Estimator
*/
public function predict(array $samples)
{
$this->transformSamples($samples);
$this->transform($samples);
if ($this->estimator === null) {
throw new InvalidOperationException('Pipeline without estimator can\'t use predict method');
}
return $this->estimator->predict($samples);
}
private function transformSamples(array &$samples): void
public function fit(array $samples, ?array $targets = null): void
{
foreach ($this->transformers as $transformer) {
$transformer->transform($samples);
$transformer->fit($samples, $targets);
$transformer->transform($samples, $targets);
}
}
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($this->transformers as $transformer) {
$transformer->transform($samples, $targets);
}
}
}

View File

@ -0,0 +1,42 @@
<?php
declare(strict_types=1);
namespace Phpml\Preprocessing;
final class ColumnFilter implements Preprocessor
{
/**
* @var string[]
*/
private $datasetColumns = [];
/**
* @var string[]
*/
private $filterColumns = [];
public function __construct(array $datasetColumns, array $filterColumns)
{
$this->datasetColumns = array_map(static function (string $column): string {
return $column;
}, $datasetColumns);
$this->filterColumns = array_map(static function (string $column): string {
return $column;
}, $filterColumns);
}
public function fit(array $samples, ?array $targets = null): void
{
//nothing to do
}
public function transform(array &$samples, ?array &$targets = null): void
{
$keys = array_intersect($this->datasetColumns, $this->filterColumns);
foreach ($samples as &$sample) {
$sample = array_values(array_intersect_key($sample, $keys));
}
}
}

View File

@ -49,7 +49,7 @@ class Imputer implements Preprocessor
$this->samples = $samples;
}
public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
if ($this->samples === []) {
throw new InvalidOperationException('Missing training samples for Imputer.');

View File

@ -0,0 +1,47 @@
<?php
declare(strict_types=1);
namespace Phpml\Preprocessing;
final class LabelEncoder implements Preprocessor
{
/**
* @var int[]
*/
private $classes = [];
public function fit(array $samples, ?array $targets = null): void
{
$this->classes = [];
foreach ($samples as $sample) {
if (!isset($this->classes[(string) $sample])) {
$this->classes[(string) $sample] = count($this->classes);
}
}
}
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
$sample = $this->classes[(string) $sample];
}
}
public function inverseTransform(array &$samples): void
{
$classes = array_flip($this->classes);
foreach ($samples as &$sample) {
$sample = $classes[$sample];
}
}
/**
* @return string[]
*/
public function classes(): array
{
return array_keys($this->classes);
}
}

View File

@ -0,0 +1,30 @@
<?php
declare(strict_types=1);
namespace Phpml\Preprocessing;
final class LambdaTransformer implements Preprocessor
{
/**
* @var callable
*/
private $lambda;
public function __construct(callable $lambda)
{
$this->lambda = $lambda;
}
public function fit(array $samples, ?array $targets = null): void
{
// nothing to do
}
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
$sample = call_user_func($this->lambda, $sample);
}
}
}

View File

@ -66,7 +66,7 @@ class Normalizer implements Preprocessor
$this->fitted = true;
}
public function transform(array &$samples): void
public function transform(array &$samples, ?array &$targets = null): void
{
$methods = [
self::NORM_L1 => 'normalizeL1',

View File

@ -0,0 +1,47 @@
<?php
declare(strict_types=1);
namespace Phpml\Preprocessing;
final class NumberConverter implements Preprocessor
{
/**
* @var bool
*/
private $transformTargets;
/**
* @var mixed
*/
private $nonNumericPlaceholder;
/**
* @param mixed $nonNumericPlaceholder
*/
public function __construct(bool $transformTargets = false, $nonNumericPlaceholder = null)
{
$this->transformTargets = $transformTargets;
$this->nonNumericPlaceholder = $nonNumericPlaceholder;
}
public function fit(array $samples, ?array $targets = null): void
{
//nothing to do
}
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
foreach ($sample as &$feature) {
$feature = is_numeric($feature) ? (float) $feature : $this->nonNumericPlaceholder;
}
}
if ($this->transformTargets && is_array($targets)) {
foreach ($targets as &$target) {
$target = is_numeric($target) ? (float) $target : $this->nonNumericPlaceholder;
}
}
}
}

View File

@ -0,0 +1,66 @@
<?php
declare(strict_types=1);
namespace Phpml\Preprocessing;
use Phpml\Exception\InvalidArgumentException;
final class OneHotEncoder implements Preprocessor
{
/**
* @var bool
*/
private $ignoreUnknown;
/**
* @var array
*/
private $categories = [];
public function __construct(bool $ignoreUnknown = false)
{
$this->ignoreUnknown = $ignoreUnknown;
}
public function fit(array $samples, ?array $targets = null): void
{
foreach (array_keys(array_values(current($samples))) as $column) {
$this->fitColumn($column, array_values(array_unique(array_column($samples, $column))));
}
}
public function transform(array &$samples, ?array &$targets = null): void
{
foreach ($samples as &$sample) {
$sample = $this->transformSample(array_values($sample));
}
}
private function fitColumn(int $column, array $values): void
{
$count = count($values);
foreach ($values as $index => $value) {
$map = array_fill(0, $count, 0);
$map[$index] = 1;
$this->categories[$column][$value] = $map;
}
}
private function transformSample(array $sample): array
{
$encoded = [];
foreach ($sample as $column => $feature) {
if (!isset($this->categories[$column][$feature]) && !$this->ignoreUnknown) {
throw new InvalidArgumentException(sprintf('Missing category "%s" for column %s in trained encoder', $feature, $column));
}
$encoded = array_merge(
$encoded,
$this->categories[$column][$feature] ?? array_fill(0, count($this->categories[$column]), 0)
);
}
return $encoded;
}
}

View File

@ -0,0 +1,64 @@
<?php
declare(strict_types=1);
namespace Phpml\Tokenization;
use Phpml\Exception\InvalidArgumentException;
class NGramWordTokenizer extends WordTokenizer
{
/**
* @var int
*/
private $minGram;
/**
* @var int
*/
private $maxGram;
public function __construct(int $minGram = 1, int $maxGram = 2)
{
if ($minGram < 1 || $maxGram < 1 || $minGram > $maxGram) {
throw new InvalidArgumentException(sprintf('Invalid (%s, %s) minGram and maxGram value combination', $minGram, $maxGram));
}
$this->minGram = $minGram;
$this->maxGram = $maxGram;
}
/**
* {@inheritdoc}
*/
public function tokenize(string $text): array
{
preg_match_all('/\w\w+/u', $text, $words);
$words = $words[0];
$nGrams = [];
for ($j = $this->minGram; $j <= $this->maxGram; $j++) {
$nGrams = array_merge($nGrams, $this->getNgrams($words, $j));
}
return $nGrams;
}
private function getNgrams(array $match, int $n = 2): array
{
$ngrams = [];
$len = count($match);
for ($i = 0; $i < $len; $i++) {
if ($i > ($n - 2)) {
$ng = '';
for ($j = $n - 1; $j >= 0; $j--) {
$ng .= ' '.$match[$i - $j];
}
$ngrams[] = trim($ng);
}
}
return $ngrams;
}
}

View File

@ -11,5 +11,5 @@ interface Transformer
*/
public function fit(array $samples, ?array $targets = null): void;
public function transform(array &$samples): void;
public function transform(array &$samples, ?array &$targets = null): void;
}

View File

@ -1,7 +0,0 @@
Description of php-ml import into mlbackend_php.
The current version is de50490.
Prodedure:
* Get rid of everything else than src/ directory and LICENSE
* Copy src/ and LICENSE into lib/mlbackend/php/phpml/

View File

@ -4,7 +4,7 @@
<location>phpml</location>
<name>PHP-ML</name>
<license>MIT</license>
<version>0.8.0</version>
<version>0.9.0</version>
<licenseversion></licenseversion>
</library>
</libraries>