mirror of
https://github.com/moodle/moodle.git
synced 2025-01-19 06:18:28 +01:00
Merge branch 'MDL-71715-master' of https://github.com/ilyatregubov/moodle
This commit is contained in:
commit
12be973ee6
@ -1,6 +1,6 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016-2018 Arkadiusz Kondas <arkadiusz.kondas[at]gmail>
|
||||
Copyright (c) 2016-2020 Arkadiusz Kondas <arkadiusz.kondas[at]gmail>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
@ -4,3 +4,5 @@ Current version is 0.8.0
|
||||
# Remove all files but:
|
||||
* src/
|
||||
* LICENSE
|
||||
# Copy content of src/ to /path/to/moodle/lib/mlbackend/php/phpml/src/Phpml
|
||||
# Copy LICENSE file to /path/to/moodle/lib/mlbackend/php/phpml
|
||||
|
@ -104,11 +104,11 @@ class Apriori implements Associator
|
||||
*/
|
||||
protected function predictSample(array $sample): array
|
||||
{
|
||||
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
|
||||
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample): bool {
|
||||
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
|
||||
}));
|
||||
|
||||
return array_map(function ($rule) {
|
||||
return array_map(static function ($rule) {
|
||||
return $rule[self::ARRAY_KEY_CONSEQUENT];
|
||||
}, $predicts);
|
||||
}
|
||||
@ -177,7 +177,7 @@ class Apriori implements Associator
|
||||
$cardinality = count($sample);
|
||||
$antecedents = $this->powerSet($sample);
|
||||
|
||||
return array_filter($antecedents, function ($antecedent) use ($cardinality) {
|
||||
return array_filter($antecedents, static function ($antecedent) use ($cardinality): bool {
|
||||
return (count($antecedent) != $cardinality) && ($antecedent != []);
|
||||
});
|
||||
}
|
||||
@ -199,7 +199,7 @@ class Apriori implements Associator
|
||||
}
|
||||
}
|
||||
|
||||
return array_map(function ($entry) {
|
||||
return array_map(static function ($entry): array {
|
||||
return [$entry];
|
||||
}, $items);
|
||||
}
|
||||
@ -213,7 +213,7 @@ class Apriori implements Associator
|
||||
*/
|
||||
private function frequent(array $samples): array
|
||||
{
|
||||
return array_values(array_filter($samples, function ($entry) {
|
||||
return array_values(array_filter($samples, function ($entry): bool {
|
||||
return $this->support($entry) >= $this->support;
|
||||
}));
|
||||
}
|
||||
@ -288,7 +288,7 @@ class Apriori implements Associator
|
||||
*/
|
||||
private function frequency(array $sample): int
|
||||
{
|
||||
return count(array_filter($this->samples, function ($entry) use ($sample) {
|
||||
return count(array_filter($this->samples, function ($entry) use ($sample): bool {
|
||||
return $this->subset($entry, $sample);
|
||||
}));
|
||||
}
|
||||
@ -303,7 +303,7 @@ class Apriori implements Associator
|
||||
*/
|
||||
private function contains(array $system, array $set): bool
|
||||
{
|
||||
return (bool) array_filter($system, function ($entry) use ($set) {
|
||||
return (bool) array_filter($system, function ($entry) use ($set): bool {
|
||||
return $this->equals($entry, $set);
|
||||
});
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ class RandomForest extends Bagging
|
||||
* Default value for the ratio is 'log' which results in log(numFeatures, 2) + 1
|
||||
* features to be taken into consideration while selecting subspace of features
|
||||
*
|
||||
* @param string|float $ratio
|
||||
* @param mixed $ratio
|
||||
*/
|
||||
public function setFeatureSubsetRatio($ratio): self
|
||||
{
|
||||
@ -73,7 +73,9 @@ class RandomForest extends Bagging
|
||||
throw new InvalidArgumentException('RandomForest can only use DecisionTree as base classifier');
|
||||
}
|
||||
|
||||
return parent::setClassifer($classifier, $classifierOptions);
|
||||
parent::setClassifer($classifier, $classifierOptions);
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -122,12 +124,16 @@ class RandomForest extends Bagging
|
||||
}
|
||||
|
||||
/**
|
||||
* @param DecisionTree $classifier
|
||||
*
|
||||
* @return DecisionTree
|
||||
*/
|
||||
protected function initSingleClassifier(Classifier $classifier): Classifier
|
||||
{
|
||||
if (!$classifier instanceof DecisionTree) {
|
||||
throw new InvalidArgumentException(
|
||||
sprintf('Classifier %s expected, got %s', DecisionTree::class, get_class($classifier))
|
||||
);
|
||||
}
|
||||
|
||||
if (is_float($this->featureSubsetRatio)) {
|
||||
$featureCount = (int) ($this->featureSubsetRatio * $this->featureCount);
|
||||
} elseif ($this->featureSubsetRatio === 'sqrt') {
|
||||
|
@ -58,7 +58,7 @@ class Adaline extends Perceptron
|
||||
protected function runTraining(array $samples, array $targets): void
|
||||
{
|
||||
// The cost function is the sum of squares
|
||||
$callback = function ($weights, $sample, $target) {
|
||||
$callback = function ($weights, $sample, $target): array {
|
||||
$this->weights = $weights;
|
||||
|
||||
$output = $this->output($sample);
|
||||
|
@ -188,7 +188,7 @@ class LogisticRegression extends Adaline
|
||||
* The gradient of the cost function to be used with gradient descent:
|
||||
* ∇J(x) = -(y - h(x)) = (h(x) - y)
|
||||
*/
|
||||
return function ($weights, $sample, $y) use ($penalty) {
|
||||
return function ($weights, $sample, $y) use ($penalty): array {
|
||||
$this->weights = $weights;
|
||||
$hX = $this->output($sample);
|
||||
|
||||
@ -220,13 +220,13 @@ class LogisticRegression extends Adaline
|
||||
* The gradient of the cost function:
|
||||
* ∇J(x) = -(h(x) - y) . h(x) . (1 - h(x))
|
||||
*/
|
||||
return function ($weights, $sample, $y) use ($penalty) {
|
||||
return function ($weights, $sample, $y) use ($penalty): array {
|
||||
$this->weights = $weights;
|
||||
$hX = $this->output($sample);
|
||||
|
||||
$y = $y < 0 ? 0 : 1;
|
||||
|
||||
$error = ($y - $hX) ** 2;
|
||||
$error = (($y - $hX) ** 2);
|
||||
$gradient = -($y - $hX) * $hX * (1 - $hX);
|
||||
|
||||
return [$error, $gradient, $penalty];
|
||||
|
@ -154,7 +154,7 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
protected function runTraining(array $samples, array $targets): void
|
||||
{
|
||||
// The cost function is the sum of squares
|
||||
$callback = function ($weights, $sample, $target) {
|
||||
$callback = function ($weights, $sample, $target): array {
|
||||
$this->weights = $weights;
|
||||
|
||||
$prediction = $this->outputClass($sample);
|
||||
|
@ -77,9 +77,6 @@ class FuzzyCMeans implements Clusterer
|
||||
return $this->membership;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Point[]|int[][] $samples
|
||||
*/
|
||||
public function cluster(array $samples): array
|
||||
{
|
||||
// Initialize variables, clusters and membership matrix
|
||||
@ -142,7 +139,7 @@ class FuzzyCMeans implements Clusterer
|
||||
$total += $val;
|
||||
}
|
||||
|
||||
$this->membership[] = array_map(function ($val) use ($total) {
|
||||
$this->membership[] = array_map(static function ($val) use ($total): float {
|
||||
return $val / $total;
|
||||
}, $row);
|
||||
}
|
||||
@ -210,7 +207,7 @@ class FuzzyCMeans implements Clusterer
|
||||
$this->samples[$col]
|
||||
);
|
||||
|
||||
$val = ($dist1 / $dist2) ** 2.0 / ($this->fuzziness - 1);
|
||||
$val = (($dist1 / $dist2) ** 2.0) / ($this->fuzziness - 1);
|
||||
$sum += $val;
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ class Space extends SplObjectStorage
|
||||
$min = $this->newPoint(array_fill(0, $this->dimension, null));
|
||||
$max = $this->newPoint(array_fill(0, $this->dimension, null));
|
||||
|
||||
/** @var self $point */
|
||||
/** @var Point $point */
|
||||
foreach ($this as $point) {
|
||||
for ($n = 0; $n < $this->dimension; ++$n) {
|
||||
if ($min[$n] === null || $min[$n] > $point[$n]) {
|
||||
@ -167,6 +167,10 @@ class Space extends SplObjectStorage
|
||||
foreach ($cluster as $point) {
|
||||
$closest = $point->getClosest($clusters);
|
||||
|
||||
if ($closest === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($closest !== $cluster) {
|
||||
$attach[$closest] ?? $attach[$closest] = new SplObjectStorage();
|
||||
$detach[$cluster] ?? $detach[$cluster] = new SplObjectStorage();
|
||||
|
@ -35,8 +35,8 @@ class CsvDataset extends ArrayDataset
|
||||
}
|
||||
|
||||
$samples = $targets = [];
|
||||
while (($data = fgetcsv($handle, $maxLineLength, $delimiter)) !== false) {
|
||||
$samples[] = array_slice((array) $data, 0, $features);
|
||||
while ($data = fgetcsv($handle, $maxLineLength, $delimiter)) {
|
||||
$samples[] = array_slice($data, 0, $features);
|
||||
$targets[] = $data[$features];
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,13 @@ class FilesDataset extends ArrayDataset
|
||||
|
||||
private function scanRootPath(string $rootPath): void
|
||||
{
|
||||
foreach (glob($rootPath.DIRECTORY_SEPARATOR.'*', GLOB_ONLYDIR) as $dir) {
|
||||
$dirs = glob($rootPath.DIRECTORY_SEPARATOR.'*', GLOB_ONLYDIR);
|
||||
|
||||
if ($dirs === false) {
|
||||
throw new DatasetException(sprintf('An error occurred during directory "%s" scan', $rootPath));
|
||||
}
|
||||
|
||||
foreach ($dirs as $dir) {
|
||||
$this->scanDir($dir);
|
||||
}
|
||||
}
|
||||
@ -28,7 +34,12 @@ class FilesDataset extends ArrayDataset
|
||||
{
|
||||
$target = basename($dir);
|
||||
|
||||
foreach (array_filter(glob($dir.DIRECTORY_SEPARATOR.'*'), 'is_file') as $file) {
|
||||
$files = glob($dir.DIRECTORY_SEPARATOR.'*');
|
||||
if ($files === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (array_filter($files, 'is_file') as $file) {
|
||||
$this->samples[] = file_get_contents($file);
|
||||
$this->targets[] = $target;
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ class SvmDataset extends ArrayDataset
|
||||
$targets = [];
|
||||
$maxIndex = 0;
|
||||
while (false !== $line = fgets($handle)) {
|
||||
[$sample, $target, $maxIndex] = self::processLine((string) $line, $maxIndex);
|
||||
[$sample, $target, $maxIndex] = self::processLine($line, $maxIndex);
|
||||
$samples[] = $sample;
|
||||
$targets[] = $target;
|
||||
}
|
||||
|
@ -179,13 +179,13 @@ class KernelPCA extends PCA
|
||||
// k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
|
||||
$dist = new Euclidean();
|
||||
|
||||
return function ($x, $y) use ($dist) {
|
||||
return function ($x, $y) use ($dist): float {
|
||||
return exp(-$this->gamma * $dist->sqDistance($x, $y));
|
||||
};
|
||||
|
||||
case self::KERNEL_SIGMOID:
|
||||
// k(x,y)=tanh(γ.xT.y+c0) where c0=1
|
||||
return function ($x, $y) {
|
||||
return function ($x, $y): float {
|
||||
$res = Matrix::dot($x, $y)[0] + 1.0;
|
||||
|
||||
return tanh((float) $this->gamma * $res);
|
||||
@ -195,7 +195,7 @@ class KernelPCA extends PCA
|
||||
// k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
|
||||
$dist = new Manhattan();
|
||||
|
||||
return function ($x, $y) use ($dist) {
|
||||
return function ($x, $y) use ($dist): float {
|
||||
return exp(-$this->gamma * $dist->distance($x, $y));
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,30 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\FeatureExtraction\StopWords;
|
||||
|
||||
use Phpml\FeatureExtraction\StopWords;
|
||||
|
||||
final class Russian extends StopWords
|
||||
{
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $stopWords = [
|
||||
'и', 'в', 'во', 'не', 'что', 'он', 'на', 'я', 'с', 'со', 'как', 'а', 'то', 'все', 'она', 'так', 'его', 'но', 'да', 'ты', 'к', 'у',
|
||||
'же', 'вы', 'за', 'бы', 'по', 'только', 'ее', 'мне', 'было', 'вот', 'от', 'меня', 'еще', 'нет', 'о', 'из', 'ему', 'теперь', 'когда',
|
||||
'даже', 'ну', 'вдруг', 'ли', 'если', 'уже', 'или', 'ни', 'быть', 'был', 'него', 'до', 'вас', 'нибудь', 'опять', 'уж', 'вам', 'ведь',
|
||||
'там', 'потом', 'себя', 'ничего', 'ей', 'может', 'они', 'тут', 'где', 'есть', 'надо', 'ней', 'для', 'мы', 'тебя', 'их', 'чем', 'была',
|
||||
'сам', 'чтоб', 'без', 'будто', 'чего', 'раз', 'тоже', 'себе', 'под', 'будет', 'ж', 'тогда', 'кто', 'этот', 'того', 'потому', 'этого',
|
||||
'какой', 'совсем', 'ним', 'здесь', 'этом', 'один', 'почти', 'мой', 'тем', 'чтобы', 'нее', 'сейчас', 'были', 'куда', 'зачем', 'всех',
|
||||
'никогда', 'можно', 'при', 'наконец', 'два', 'об', 'другой', 'хоть', 'после', 'над', 'больше', 'тот', 'через', 'эти', 'нас', 'про',
|
||||
'всего', 'них', 'какая', 'много', 'разве', 'три', 'эту', 'моя', 'впрочем', 'хорошо', 'свою', 'этой', 'перед', 'иногда', 'лучше', 'чуть',
|
||||
'том', 'нельзя', 'такой', 'им', 'более', 'всегда', 'конечно', 'всю', 'между',
|
||||
];
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct($this->stopWords);
|
||||
}
|
||||
}
|
@ -30,7 +30,7 @@ class TfIdfTransformer implements Transformer
|
||||
}
|
||||
}
|
||||
|
||||
public function transform(array &$samples): void
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
foreach ($samples as &$sample) {
|
||||
foreach ($sample as $index => &$feature) {
|
||||
|
@ -46,7 +46,7 @@ class TokenCountVectorizer implements Transformer
|
||||
$this->buildVocabulary($samples);
|
||||
}
|
||||
|
||||
public function transform(array &$samples): void
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
array_walk($samples, function (string &$sample): void {
|
||||
$this->transformSample($sample);
|
||||
|
@ -46,7 +46,7 @@ final class UnivariateLinearRegression implements ScoringFunction
|
||||
foreach (array_keys($samples[0]) as $index) {
|
||||
$featureColumn = array_column($samples, $index);
|
||||
$correlations[$index] =
|
||||
(Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm())
|
||||
Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm()
|
||||
/ (new Matrix($targets, false))->frobeniusNorm();
|
||||
}
|
||||
|
||||
|
@ -56,7 +56,7 @@ final class SelectKBest implements Transformer
|
||||
$this->keepColumns = array_slice($sorted, 0, $this->k, true);
|
||||
}
|
||||
|
||||
public function transform(array &$samples): void
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
if ($this->keepColumns === null) {
|
||||
return;
|
||||
|
@ -37,7 +37,7 @@ final class VarianceThreshold implements Transformer
|
||||
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
$this->variances = array_map(function (array $column) {
|
||||
$this->variances = array_map(static function (array $column): float {
|
||||
return Variance::population($column);
|
||||
}, Matrix::transposeArray($samples));
|
||||
|
||||
@ -48,7 +48,7 @@ final class VarianceThreshold implements Transformer
|
||||
}
|
||||
}
|
||||
|
||||
public function transform(array &$samples): void
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
foreach ($samples as &$sample) {
|
||||
$sample = array_values(array_intersect_key($sample, $this->keepColumns));
|
||||
|
72
lib/mlbackend/php/phpml/src/Phpml/FeatureUnion.php
Normal file
72
lib/mlbackend/php/phpml/src/Phpml/FeatureUnion.php
Normal file
@ -0,0 +1,72 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
final class FeatureUnion implements Transformer
|
||||
{
|
||||
/**
|
||||
* @var Pipeline[]
|
||||
*/
|
||||
private $pipelines = [];
|
||||
|
||||
/**
|
||||
* @var Pipeline[]
|
||||
*/
|
||||
public function __construct(array $pipelines)
|
||||
{
|
||||
if ($pipelines === []) {
|
||||
throw new InvalidArgumentException('At least one pipeline is required');
|
||||
}
|
||||
|
||||
$this->pipelines = array_map(static function (Pipeline $pipeline): Pipeline {
|
||||
return $pipeline;
|
||||
}, $pipelines);
|
||||
}
|
||||
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
$originSamples = $samples;
|
||||
foreach ($this->pipelines as $pipeline) {
|
||||
foreach ($pipeline->getTransformers() as $transformer) {
|
||||
$transformer->fit($samples, $targets);
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
$samples = $originSamples;
|
||||
}
|
||||
}
|
||||
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
$this->transformSamples($samples, $targets);
|
||||
}
|
||||
|
||||
public function fitAndTransform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
$this->transformSamples($samples, $targets, true);
|
||||
}
|
||||
|
||||
private function transformSamples(array &$samples, ?array &$targets = null, bool $fit = false): void
|
||||
{
|
||||
$union = [];
|
||||
$originSamples = $samples;
|
||||
foreach ($this->pipelines as $pipeline) {
|
||||
foreach ($pipeline->getTransformers() as $transformer) {
|
||||
if ($fit) {
|
||||
$transformer->fit($samples, $targets);
|
||||
}
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
|
||||
foreach ($samples as $index => $sample) {
|
||||
$union[$index] = array_merge($union[$index] ?? [], is_array($sample) ? $sample : [$sample]);
|
||||
}
|
||||
$samples = $originSamples;
|
||||
}
|
||||
|
||||
$samples = $union;
|
||||
}
|
||||
}
|
@ -38,7 +38,7 @@ class GD extends StochasticGD
|
||||
|
||||
$this->updateWeightsWithUpdates($updates, $totalPenalty);
|
||||
|
||||
$this->costValues[] = array_sum($errors) / $this->sampleCount;
|
||||
$this->costValues[] = array_sum($errors) / (int) $this->sampleCount;
|
||||
|
||||
if ($this->earlyStop($theta)) {
|
||||
break;
|
||||
|
@ -48,6 +48,11 @@ abstract class Optimizer
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function theta(): array
|
||||
{
|
||||
return $this->theta;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes the optimization with the given samples & targets
|
||||
* and returns the weights
|
||||
|
@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Math\Kernel;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
use Phpml\Math\Kernel;
|
||||
use Phpml\Math\Product;
|
||||
|
||||
@ -19,12 +20,12 @@ class RBF implements Kernel
|
||||
$this->gamma = $gamma;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $a
|
||||
* @param array $b
|
||||
*/
|
||||
public function compute($a, $b): float
|
||||
{
|
||||
if (!is_array($a) || !is_array($b)) {
|
||||
throw new InvalidArgumentException(sprintf('Arguments of %s must be arrays', __METHOD__));
|
||||
}
|
||||
|
||||
$score = 2 * Product::scalar($a, $b);
|
||||
$squares = Product::scalar($a, $a) + Product::scalar($b, $b);
|
||||
|
||||
|
@ -502,7 +502,8 @@ class EigenvalueDecomposition
|
||||
}
|
||||
|
||||
// Double division avoids possible underflow
|
||||
$g = ($g / $this->ort[$m]) / $this->H[$m][$m - 1];
|
||||
$g /= $this->ort[$m];
|
||||
$g /= $this->H[$m][$m - 1];
|
||||
for ($i = $m; $i <= $high; ++$i) {
|
||||
$this->V[$i][$j] += $g * $this->ort[$i];
|
||||
}
|
||||
@ -734,7 +735,7 @@ class EigenvalueDecomposition
|
||||
|
||||
// Double QR step involving rows l:n and columns m:n
|
||||
for ($k = $m; $k <= $n - 1; ++$k) {
|
||||
$notlast = ($k != $n - 1);
|
||||
$notlast = $k != $n - 1;
|
||||
if ($k != $m) {
|
||||
$p = $this->H[$k][$k - 1];
|
||||
$q = $this->H[$k + 1][$k - 1];
|
||||
|
@ -126,7 +126,7 @@ class Matrix
|
||||
public function transpose(): self
|
||||
{
|
||||
if ($this->rows === 1) {
|
||||
$matrix = array_map(function ($el) {
|
||||
$matrix = array_map(static function ($el): array {
|
||||
return [$el];
|
||||
}, $this->matrix[0]);
|
||||
} else {
|
||||
@ -201,7 +201,7 @@ class Matrix
|
||||
*/
|
||||
public function add(self $other): self
|
||||
{
|
||||
return $this->_add($other);
|
||||
return $this->sum($other);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -209,7 +209,7 @@ class Matrix
|
||||
*/
|
||||
public function subtract(self $other): self
|
||||
{
|
||||
return $this->_add($other, -1);
|
||||
return $this->sum($other, -1);
|
||||
}
|
||||
|
||||
public function inverse(): self
|
||||
@ -297,7 +297,7 @@ class Matrix
|
||||
/**
|
||||
* Element-wise addition or substraction depending on the given sign parameter
|
||||
*/
|
||||
private function _add(self $other, int $sign = 1): self
|
||||
private function sum(self $other, int $sign = 1): self
|
||||
{
|
||||
$a1 = $this->toArray();
|
||||
$a2 = $other->toArray();
|
||||
|
@ -28,7 +28,7 @@ final class ANOVA
|
||||
throw new InvalidArgumentException('The array must have at least 2 elements');
|
||||
}
|
||||
|
||||
$samplesPerClass = array_map(function (array $class): int {
|
||||
$samplesPerClass = array_map(static function (array $class): int {
|
||||
return count($class);
|
||||
}, $samples);
|
||||
$allSamples = (int) array_sum($samplesPerClass);
|
||||
@ -41,10 +41,14 @@ final class ANOVA
|
||||
$dfbn = $classes - 1;
|
||||
$dfwn = $allSamples - $classes;
|
||||
|
||||
$msb = array_map(function ($s) use ($dfbn) {
|
||||
$msb = array_map(static function ($s) use ($dfbn) {
|
||||
return $s / $dfbn;
|
||||
}, $ssbn);
|
||||
$msw = array_map(function ($s) use ($dfwn) {
|
||||
$msw = array_map(static function ($s) use ($dfwn) {
|
||||
if ($dfwn === 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return $s / $dfwn;
|
||||
}, $sswn);
|
||||
|
||||
@ -72,7 +76,7 @@ final class ANOVA
|
||||
|
||||
private static function sumOfFeaturesPerClass(array $samples): array
|
||||
{
|
||||
return array_map(function (array $class) {
|
||||
return array_map(static function (array $class): array {
|
||||
$sum = array_fill(0, count($class[0]), 0);
|
||||
foreach ($class as $sample) {
|
||||
foreach ($sample as $index => $feature) {
|
||||
@ -93,7 +97,7 @@ final class ANOVA
|
||||
}
|
||||
}
|
||||
|
||||
return array_map(function ($sum) {
|
||||
return array_map(static function ($sum) {
|
||||
return $sum ** 2;
|
||||
}, $squares);
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ class StandardDeviation
|
||||
$mean = Mean::arithmetic($numbers);
|
||||
|
||||
return array_sum(array_map(
|
||||
function ($val) use ($mean) {
|
||||
static function ($val) use ($mean): float {
|
||||
return ($val - $mean) ** 2;
|
||||
},
|
||||
$numbers
|
||||
|
@ -148,7 +148,7 @@ class ClassificationReport
|
||||
|
||||
$precision = $this->computePrecision($truePositive, $falsePositive);
|
||||
$recall = $this->computeRecall($truePositive, $falseNegative);
|
||||
$f1score = $this->computeF1Score((float) $precision, (float) $recall);
|
||||
$f1score = $this->computeF1Score($precision, $recall);
|
||||
|
||||
$this->average = compact('precision', 'recall', 'f1score');
|
||||
}
|
||||
@ -186,10 +186,7 @@ class ClassificationReport
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return float|string
|
||||
*/
|
||||
private function computePrecision(int $truePositive, int $falsePositive)
|
||||
private function computePrecision(int $truePositive, int $falsePositive): float
|
||||
{
|
||||
$divider = $truePositive + $falsePositive;
|
||||
if ($divider == 0) {
|
||||
@ -199,10 +196,7 @@ class ClassificationReport
|
||||
return $truePositive / $divider;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return float|string
|
||||
*/
|
||||
private function computeRecall(int $truePositive, int $falseNegative)
|
||||
private function computeRecall(int $truePositive, int $falseNegative): float
|
||||
{
|
||||
$divider = $truePositive + $falseNegative;
|
||||
if ($divider == 0) {
|
||||
|
86
lib/mlbackend/php/phpml/src/Phpml/Metric/Regression.php
Normal file
86
lib/mlbackend/php/phpml/src/Phpml/Metric/Regression.php
Normal file
@ -0,0 +1,86 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Metric;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
use Phpml\Math\Statistic\Correlation;
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
final class Regression
|
||||
{
|
||||
public static function meanSquaredError(array $targets, array $predictions): float
|
||||
{
|
||||
self::assertCountEquals($targets, $predictions);
|
||||
|
||||
$errors = [];
|
||||
foreach ($targets as $index => $target) {
|
||||
$errors[] = (($target - $predictions[$index]) ** 2);
|
||||
}
|
||||
|
||||
return Mean::arithmetic($errors);
|
||||
}
|
||||
|
||||
public static function meanSquaredLogarithmicError(array $targets, array $predictions): float
|
||||
{
|
||||
self::assertCountEquals($targets, $predictions);
|
||||
|
||||
$errors = [];
|
||||
foreach ($targets as $index => $target) {
|
||||
$errors[] = log((1 + $target) / (1 + $predictions[$index])) ** 2;
|
||||
}
|
||||
|
||||
return Mean::arithmetic($errors);
|
||||
}
|
||||
|
||||
public static function meanAbsoluteError(array $targets, array $predictions): float
|
||||
{
|
||||
self::assertCountEquals($targets, $predictions);
|
||||
|
||||
$errors = [];
|
||||
foreach ($targets as $index => $target) {
|
||||
$errors[] = abs($target - $predictions[$index]);
|
||||
}
|
||||
|
||||
return Mean::arithmetic($errors);
|
||||
}
|
||||
|
||||
public static function medianAbsoluteError(array $targets, array $predictions): float
|
||||
{
|
||||
self::assertCountEquals($targets, $predictions);
|
||||
|
||||
$errors = [];
|
||||
foreach ($targets as $index => $target) {
|
||||
$errors[] = abs($target - $predictions[$index]);
|
||||
}
|
||||
|
||||
return (float) Mean::median($errors);
|
||||
}
|
||||
|
||||
public static function r2Score(array $targets, array $predictions): float
|
||||
{
|
||||
self::assertCountEquals($targets, $predictions);
|
||||
|
||||
return Correlation::pearson($targets, $predictions) ** 2;
|
||||
}
|
||||
|
||||
public static function maxError(array $targets, array $predictions): float
|
||||
{
|
||||
self::assertCountEquals($targets, $predictions);
|
||||
|
||||
$errors = [];
|
||||
foreach ($targets as $index => $target) {
|
||||
$errors[] = abs($target - $predictions[$index]);
|
||||
}
|
||||
|
||||
return (float) max($errors);
|
||||
}
|
||||
|
||||
private static function assertCountEquals(array &$targets, array &$predictions): void
|
||||
{
|
||||
if (count($targets) !== count($predictions)) {
|
||||
throw new InvalidArgumentException('Targets count must be equal with predictions count');
|
||||
}
|
||||
}
|
||||
}
|
@ -12,29 +12,29 @@ class ModelManager
|
||||
public function saveToFile(Estimator $estimator, string $filepath): void
|
||||
{
|
||||
if (!is_writable(dirname($filepath))) {
|
||||
throw new FileException(sprintf('File "%s" can\'t be saved.', basename($filepath)));
|
||||
throw new FileException(sprintf('File "%s" cannot be saved.', basename($filepath)));
|
||||
}
|
||||
|
||||
$serialized = serialize($estimator);
|
||||
if (!isset($serialized[0])) {
|
||||
throw new SerializeException(sprintf('Class "%s" can not be serialized.', gettype($estimator)));
|
||||
throw new SerializeException(sprintf('Class "%s" cannot be serialized.', gettype($estimator)));
|
||||
}
|
||||
|
||||
$result = file_put_contents($filepath, $serialized, LOCK_EX);
|
||||
if ($result === false) {
|
||||
throw new FileException(sprintf('File "%s" can\'t be saved.', basename($filepath)));
|
||||
throw new FileException(sprintf('File "%s" cannot be saved.', basename($filepath)));
|
||||
}
|
||||
}
|
||||
|
||||
public function restoreFromFile(string $filepath): Estimator
|
||||
{
|
||||
if (!file_exists($filepath) || !is_readable($filepath)) {
|
||||
throw new FileException(sprintf('File "%s" can\'t be open.', basename($filepath)));
|
||||
throw new FileException(sprintf('File "%s" cannot be opened.', basename($filepath)));
|
||||
}
|
||||
|
||||
$object = unserialize((string) file_get_contents($filepath), [Estimator::class]);
|
||||
if ($object === false) {
|
||||
throw new SerializeException(sprintf('"%s" can not be unserialized.', basename($filepath)));
|
||||
$object = unserialize((string) file_get_contents($filepath));
|
||||
if ($object === false || !$object instanceof Estimator) {
|
||||
throw new SerializeException(sprintf('"%s" cannot be unserialized.', basename($filepath)));
|
||||
}
|
||||
|
||||
return $object;
|
||||
|
@ -59,8 +59,14 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
|
||||
/**
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function __construct(int $inputLayerFeatures, array $hiddenLayers, array $classes, int $iterations = 10000, ?ActivationFunction $activationFunction = null, float $learningRate = 1)
|
||||
{
|
||||
public function __construct(
|
||||
int $inputLayerFeatures,
|
||||
array $hiddenLayers,
|
||||
array $classes,
|
||||
int $iterations = 10000,
|
||||
?ActivationFunction $activationFunction = null,
|
||||
float $learningRate = 1.
|
||||
) {
|
||||
if (count($hiddenLayers) === 0) {
|
||||
throw new InvalidArgumentException('Provide at least 1 hidden layer');
|
||||
}
|
||||
@ -123,6 +129,16 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
|
||||
return $result;
|
||||
}
|
||||
|
||||
public function getLearningRate(): float
|
||||
{
|
||||
return $this->learningRate;
|
||||
}
|
||||
|
||||
public function getBackpropagation(): Backpropagation
|
||||
{
|
||||
return $this->backpropagation;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $target
|
||||
*/
|
||||
|
@ -33,7 +33,7 @@ class Neuron implements Node
|
||||
|
||||
public function __construct(?ActivationFunction $activationFunction = null)
|
||||
{
|
||||
$this->activationFunction = $activationFunction ?: new Sigmoid();
|
||||
$this->activationFunction = $activationFunction ?? new Sigmoid();
|
||||
}
|
||||
|
||||
public function addSynapse(Synapse $synapse): void
|
||||
|
@ -24,7 +24,7 @@ class Synapse
|
||||
public function __construct(Node $node, ?float $weight = null)
|
||||
{
|
||||
$this->node = $node;
|
||||
$this->weight = $weight ?: $this->generateRandomWeight();
|
||||
$this->weight = $weight ?? $this->generateRandomWeight();
|
||||
}
|
||||
|
||||
public function getOutput(): float
|
||||
|
@ -1,14 +0,0 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\NeuralNetwork;
|
||||
|
||||
interface Training
|
||||
{
|
||||
/**
|
||||
* @param array $samples
|
||||
* @param array $targets
|
||||
*/
|
||||
public function train(array $samples, array $targets);
|
||||
}
|
@ -34,6 +34,11 @@ class Backpropagation
|
||||
$this->learningRate = $learningRate;
|
||||
}
|
||||
|
||||
public function getLearningRate(): float
|
||||
{
|
||||
return $this->learningRate;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $targetClass
|
||||
*/
|
||||
|
@ -4,7 +4,9 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml;
|
||||
|
||||
class Pipeline implements Estimator
|
||||
use Phpml\Exception\InvalidOperationException;
|
||||
|
||||
class Pipeline implements Estimator, Transformer
|
||||
{
|
||||
/**
|
||||
* @var Transformer[]
|
||||
@ -12,29 +14,18 @@ class Pipeline implements Estimator
|
||||
private $transformers = [];
|
||||
|
||||
/**
|
||||
* @var Estimator
|
||||
* @var Estimator|null
|
||||
*/
|
||||
private $estimator;
|
||||
|
||||
/**
|
||||
* @param Transformer[] $transformers
|
||||
*/
|
||||
public function __construct(array $transformers, Estimator $estimator)
|
||||
{
|
||||
foreach ($transformers as $transformer) {
|
||||
$this->addTransformer($transformer);
|
||||
}
|
||||
|
||||
$this->estimator = $estimator;
|
||||
}
|
||||
|
||||
public function addTransformer(Transformer $transformer): void
|
||||
{
|
||||
$this->transformers[] = $transformer;
|
||||
}
|
||||
|
||||
public function setEstimator(Estimator $estimator): void
|
||||
public function __construct(array $transformers, ?Estimator $estimator = null)
|
||||
{
|
||||
$this->transformers = array_map(static function (Transformer $transformer): Transformer {
|
||||
return $transformer;
|
||||
}, $transformers);
|
||||
$this->estimator = $estimator;
|
||||
}
|
||||
|
||||
@ -46,16 +37,20 @@ class Pipeline implements Estimator
|
||||
return $this->transformers;
|
||||
}
|
||||
|
||||
public function getEstimator(): Estimator
|
||||
public function getEstimator(): ?Estimator
|
||||
{
|
||||
return $this->estimator;
|
||||
}
|
||||
|
||||
public function train(array $samples, array $targets): void
|
||||
{
|
||||
if ($this->estimator === null) {
|
||||
throw new InvalidOperationException('Pipeline without estimator can\'t use train method');
|
||||
}
|
||||
|
||||
foreach ($this->transformers as $transformer) {
|
||||
$transformer->fit($samples, $targets);
|
||||
$transformer->transform($samples);
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
|
||||
$this->estimator->train($samples, $targets);
|
||||
@ -66,15 +61,27 @@ class Pipeline implements Estimator
|
||||
*/
|
||||
public function predict(array $samples)
|
||||
{
|
||||
$this->transformSamples($samples);
|
||||
$this->transform($samples);
|
||||
|
||||
if ($this->estimator === null) {
|
||||
throw new InvalidOperationException('Pipeline without estimator can\'t use predict method');
|
||||
}
|
||||
|
||||
return $this->estimator->predict($samples);
|
||||
}
|
||||
|
||||
private function transformSamples(array &$samples): void
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
foreach ($this->transformers as $transformer) {
|
||||
$transformer->transform($samples);
|
||||
$transformer->fit($samples, $targets);
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
}
|
||||
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
foreach ($this->transformers as $transformer) {
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,42 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
final class ColumnFilter implements Preprocessor
|
||||
{
|
||||
/**
|
||||
* @var string[]
|
||||
*/
|
||||
private $datasetColumns = [];
|
||||
|
||||
/**
|
||||
* @var string[]
|
||||
*/
|
||||
private $filterColumns = [];
|
||||
|
||||
public function __construct(array $datasetColumns, array $filterColumns)
|
||||
{
|
||||
$this->datasetColumns = array_map(static function (string $column): string {
|
||||
return $column;
|
||||
}, $datasetColumns);
|
||||
$this->filterColumns = array_map(static function (string $column): string {
|
||||
return $column;
|
||||
}, $filterColumns);
|
||||
}
|
||||
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
//nothing to do
|
||||
}
|
||||
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
$keys = array_intersect($this->datasetColumns, $this->filterColumns);
|
||||
|
||||
foreach ($samples as &$sample) {
|
||||
$sample = array_values(array_intersect_key($sample, $keys));
|
||||
}
|
||||
}
|
||||
}
|
@ -49,7 +49,7 @@ class Imputer implements Preprocessor
|
||||
$this->samples = $samples;
|
||||
}
|
||||
|
||||
public function transform(array &$samples): void
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
if ($this->samples === []) {
|
||||
throw new InvalidOperationException('Missing training samples for Imputer.');
|
||||
|
@ -0,0 +1,47 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
final class LabelEncoder implements Preprocessor
|
||||
{
|
||||
/**
|
||||
* @var int[]
|
||||
*/
|
||||
private $classes = [];
|
||||
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
$this->classes = [];
|
||||
|
||||
foreach ($samples as $sample) {
|
||||
if (!isset($this->classes[(string) $sample])) {
|
||||
$this->classes[(string) $sample] = count($this->classes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
foreach ($samples as &$sample) {
|
||||
$sample = $this->classes[(string) $sample];
|
||||
}
|
||||
}
|
||||
|
||||
public function inverseTransform(array &$samples): void
|
||||
{
|
||||
$classes = array_flip($this->classes);
|
||||
foreach ($samples as &$sample) {
|
||||
$sample = $classes[$sample];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function classes(): array
|
||||
{
|
||||
return array_keys($this->classes);
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
final class LambdaTransformer implements Preprocessor
|
||||
{
|
||||
/**
|
||||
* @var callable
|
||||
*/
|
||||
private $lambda;
|
||||
|
||||
public function __construct(callable $lambda)
|
||||
{
|
||||
$this->lambda = $lambda;
|
||||
}
|
||||
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
foreach ($samples as &$sample) {
|
||||
$sample = call_user_func($this->lambda, $sample);
|
||||
}
|
||||
}
|
||||
}
|
@ -66,7 +66,7 @@ class Normalizer implements Preprocessor
|
||||
$this->fitted = true;
|
||||
}
|
||||
|
||||
public function transform(array &$samples): void
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
$methods = [
|
||||
self::NORM_L1 => 'normalizeL1',
|
||||
|
@ -0,0 +1,47 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
final class NumberConverter implements Preprocessor
|
||||
{
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $transformTargets;
|
||||
|
||||
/**
|
||||
* @var mixed
|
||||
*/
|
||||
private $nonNumericPlaceholder;
|
||||
|
||||
/**
|
||||
* @param mixed $nonNumericPlaceholder
|
||||
*/
|
||||
public function __construct(bool $transformTargets = false, $nonNumericPlaceholder = null)
|
||||
{
|
||||
$this->transformTargets = $transformTargets;
|
||||
$this->nonNumericPlaceholder = $nonNumericPlaceholder;
|
||||
}
|
||||
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
//nothing to do
|
||||
}
|
||||
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
foreach ($samples as &$sample) {
|
||||
foreach ($sample as &$feature) {
|
||||
$feature = is_numeric($feature) ? (float) $feature : $this->nonNumericPlaceholder;
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->transformTargets && is_array($targets)) {
|
||||
foreach ($targets as &$target) {
|
||||
$target = is_numeric($target) ? (float) $target : $this->nonNumericPlaceholder;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,66 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
final class OneHotEncoder implements Preprocessor
|
||||
{
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $ignoreUnknown;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $categories = [];
|
||||
|
||||
public function __construct(bool $ignoreUnknown = false)
|
||||
{
|
||||
$this->ignoreUnknown = $ignoreUnknown;
|
||||
}
|
||||
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
foreach (array_keys(array_values(current($samples))) as $column) {
|
||||
$this->fitColumn($column, array_values(array_unique(array_column($samples, $column))));
|
||||
}
|
||||
}
|
||||
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
foreach ($samples as &$sample) {
|
||||
$sample = $this->transformSample(array_values($sample));
|
||||
}
|
||||
}
|
||||
|
||||
private function fitColumn(int $column, array $values): void
|
||||
{
|
||||
$count = count($values);
|
||||
foreach ($values as $index => $value) {
|
||||
$map = array_fill(0, $count, 0);
|
||||
$map[$index] = 1;
|
||||
$this->categories[$column][$value] = $map;
|
||||
}
|
||||
}
|
||||
|
||||
private function transformSample(array $sample): array
|
||||
{
|
||||
$encoded = [];
|
||||
foreach ($sample as $column => $feature) {
|
||||
if (!isset($this->categories[$column][$feature]) && !$this->ignoreUnknown) {
|
||||
throw new InvalidArgumentException(sprintf('Missing category "%s" for column %s in trained encoder', $feature, $column));
|
||||
}
|
||||
|
||||
$encoded = array_merge(
|
||||
$encoded,
|
||||
$this->categories[$column][$feature] ?? array_fill(0, count($this->categories[$column]), 0)
|
||||
);
|
||||
}
|
||||
|
||||
return $encoded;
|
||||
}
|
||||
}
|
@ -0,0 +1,64 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Tokenization;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
class NGramWordTokenizer extends WordTokenizer
|
||||
{
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $minGram;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $maxGram;
|
||||
|
||||
public function __construct(int $minGram = 1, int $maxGram = 2)
|
||||
{
|
||||
if ($minGram < 1 || $maxGram < 1 || $minGram > $maxGram) {
|
||||
throw new InvalidArgumentException(sprintf('Invalid (%s, %s) minGram and maxGram value combination', $minGram, $maxGram));
|
||||
}
|
||||
|
||||
$this->minGram = $minGram;
|
||||
$this->maxGram = $maxGram;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function tokenize(string $text): array
|
||||
{
|
||||
preg_match_all('/\w\w+/u', $text, $words);
|
||||
|
||||
$words = $words[0];
|
||||
|
||||
$nGrams = [];
|
||||
for ($j = $this->minGram; $j <= $this->maxGram; $j++) {
|
||||
$nGrams = array_merge($nGrams, $this->getNgrams($words, $j));
|
||||
}
|
||||
|
||||
return $nGrams;
|
||||
}
|
||||
|
||||
private function getNgrams(array $match, int $n = 2): array
|
||||
{
|
||||
$ngrams = [];
|
||||
$len = count($match);
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
if ($i > ($n - 2)) {
|
||||
$ng = '';
|
||||
for ($j = $n - 1; $j >= 0; $j--) {
|
||||
$ng .= ' '.$match[$i - $j];
|
||||
}
|
||||
$ngrams[] = trim($ng);
|
||||
}
|
||||
}
|
||||
|
||||
return $ngrams;
|
||||
}
|
||||
}
|
@ -11,5 +11,5 @@ interface Transformer
|
||||
*/
|
||||
public function fit(array $samples, ?array $targets = null): void;
|
||||
|
||||
public function transform(array &$samples): void;
|
||||
public function transform(array &$samples, ?array &$targets = null): void;
|
||||
}
|
||||
|
@ -1,7 +0,0 @@
|
||||
Description of php-ml import into mlbackend_php.
|
||||
|
||||
The current version is de50490.
|
||||
|
||||
Prodedure:
|
||||
* Get rid of everything else than src/ directory and LICENSE
|
||||
* Copy src/ and LICENSE into lib/mlbackend/php/phpml/
|
@ -4,7 +4,7 @@
|
||||
<location>phpml</location>
|
||||
<name>PHP-ML</name>
|
||||
<license>MIT</license>
|
||||
<version>0.8.0</version>
|
||||
<version>0.9.0</version>
|
||||
<licenseversion></licenseversion>
|
||||
</library>
|
||||
</libraries>
|
||||
|
Loading…
x
Reference in New Issue
Block a user