Neural networks perceptron classifier

This commit is contained in:
Michał Żarnecki 2024-12-17 14:22:03 +01:00 committed by Brandon Johnson
parent 7c3f2f8b47
commit 6b52263d4a
7 changed files with 324 additions and 0 deletions

View File

@ -97,6 +97,15 @@
* [Problem8](./Maths/ProjectEuler/Problem8.php)
* [Problem9](./Maths/ProjectEuler/Problem9.php)
## NeuralNetworks
* PerceptronClassifier
* [NeuralNetworkPerceptronClassifier.php](NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php)
* [README.md](NeuralNetworks/PerceptronClassifier/README.md)
* chart
* [dataset.png](NeuralNetworks/PerceptronClassifier/chart/dataset.png)
* [linear-separated.png](NeuralNetworks/PerceptronClassifier/chart/linear-separated.png)
* [sigmoid.png](NeuralNetworks/PerceptronClassifier/chart/sigmoid.png)
## Searches
* [Binarysearch](./Searches/BinarySearch.php)
* [Exponentialsearch](./Searches/ExponentialSearch.php)

View File

@ -0,0 +1,192 @@
<?php
namespace NeuralNetworks\PerceptronClassifier;
/**
* This class implements a simple neural network with one hidden layer and one output neuron.
* The network uses the sigmoid activation function and performs binary classification.
* (https://cw.fel.cvut.cz/b211/courses/be5b33rpz/labs/07_perceptron/start)
*
* @author Michał Żarnecki https://github.com/rzarno
*/
class NeuralNetworkPerceptronClassifier
{
/**
* @param array $X
* @param array $Y
* @param int $iterations
* @param float $learningRate
* @return array
*/
public function trainModel(array $X, array $Y, int $iterations, float $learningRate): array
{
[$W, $b] = $this->initParams(count($X));
for ($i = 0; $i < $iterations; $i++) {
// Forward propagation
$A = $this->forwardPropagation($X, $W, $b);
// Compute cost
$cost = $this->computeCost($A, $Y);
// Backward propagation
[$dW, $db] = $this->backwardPropagation($A, $X, $Y);
// Update parameters
[$W, $b] = $this->updateParams($W, $b, $dW, $db, $learningRate);
if ($i % 100 == 0) {
echo "Iteration {$i} - Cost: {$cost}\n";
}
}
return [$W, $b];
}
/**
* @param array $X
* @param array $W
* @param float $b
* @return array
*/
public function predict(array $X, array $W, float $b): array
{
$A = $this->forwardPropagation($X, $W, $b);
return array_map(fn($a) => $a > 0.5 ? 1 : 0, $A);
}
/**
* Stage 1. Prepare dataset
* @return array[]
*/
public function generateTrainingSet(): array
{
$m = 50;
// Generate a 2 x m matrix with binary values (0 or 1)
$X = [];
for ($i = 0; $i < 2; $i++) {
for ($j = 0; $j < $m; $j++) {
$X[$i][$j] = rand(0, 1);
}
}
// Compute Y: Logical AND condition (X[0] == 1 and X[1] == 0)
$Y = [];
for ($j = 0; $j < $m; $j++) {
$Y[$j] = ($X[0][$j] == 1 && $X[1][$j] == 0) ? 1 : 0;
}
return [$X, $Y];
}
/**
* Stage 2. Initialize model parameters
* @param int $n Number of features
* @return array [$W, $b] Weight and bias arrays
*/
private function initParams(int $n): array
{
$W = [];
for ($i = 0; $i < $n; $i++) {
$W[$i] = mt_rand() / mt_getrandmax(); // Small random values
}
$b = 0.0; // Bias initialized to zero
return [$W, $b];
}
/**
* Sigmoid Activation Function
* @param float $z
* @return float
*/
private function sigmoid(float $z): float
{
return 1 / (1 + exp(-$z));
}
/**
* Stage 3. Forward Propagation
* @param array $X
* @param array $W
* @param float $b
* @return array
*/
private function forwardPropagation(array $X, array $W, float $b): array
{
$Z = [];
for ($j = 0; $j < count($X[0]); $j++) {
$sum = $b;
for ($i = 0; $i < count($W); $i++) {
$sum += $W[$i] * $X[$i][$j];
}
$Z[$j] = $this->sigmoid($sum);
}
return $Z;
}
/**
* Stage 4. Compute Cost Function (Binary Cross-Entropy Loss)
* @param array $A
* @param array $Y
* @return float
*/
private function computeCost(array $A, array $Y): float
{
$m = count($Y);
$cost = 0.0;
for ($i = 0; $i < $m; $i++) {
$cost += -($Y[$i] * log($A[$i]) + (1 - $Y[$i]) * log(1 - $A[$i]));
}
return $cost / $m;
}
/**
* Stage 5. Backward Propagation
* @param array $A
* @param array $X
* @param array $Y
* @return array
*/
private function backwardPropagation(array $A, array $X, array $Y): array
{
$m = count($Y);
$dW = array_fill(0, count($X), 0.0);
$db = 0.0;
for ($j = 0; $j < $m; $j++) {
$dZ = $A[$j] - $Y[$j];
for ($i = 0; $i < count($X); $i++) {
$dW[$i] += $dZ * $X[$i][$j];
}
$db += $dZ;
}
// Average gradients
for ($i = 0; $i < count($dW); $i++) {
$dW[$i] /= $m;
}
$db /= $m;
return [$dW, $db];
}
/**
* STage 6. Update Parameters
* @param array $W
* @param float $b
* @param array $dW
* @param float $db
* @param float $learningRate
* @return array
*/
private function updateParams(array $W, float $b, array $dW, float $db, float $learningRate): array
{
for ($i = 0; $i < count($W); $i++) {
$W[$i] -= $learningRate * $dW[$i];
}
$b -= $learningRate * $db;
return [$W, $b];
}
}

View File

@ -0,0 +1,100 @@
## Maths behind the single Perceptron Neural Network with Activation Function
This work is based on examples from course https://www.coursera.org/learn/machine-learning-calculus prepared by author Luis Serrano.
Linear separation refers to data points in binary classification problems that can be separated by a linear decision boundary.
If the data points can be separated by a line, linear function, or flat hyperplane, they are said to be linearly separable.
If separate points in an n-dimensional space exist, then it is said to be linearly separable
$$w_1x_1 + w_2x_2 + w_nx_n + b = 0$$
For two-dimensional input data, if there is a line, whose equation is $$w_1x_1 + w_2x_2 + b = 0$$
that separates all samples of one class from the other class, then the corresponding observation can be derived from the equation of the separating line.
Such classification problems are called "linearly separable", i.e. separating by linear combination.
<img src="chart/linear-separated.png" />
The input layer contains two nodes $x_1$ and $x_2$. Weight vector $W = \begin{bmatrix} w_1 & w_2\end{bmatrix}$ and bias ($b$) are the parameters to be updated during the model training.
$$z^{(i)} = w_1x_1^{(i)} + w_2x_2^{(i)} + b = Wx^{(i)} + b.\tag{1}$$
To be able to perform classification we need nonlinear approach. This can achieved with sigmoid activation function which roughly replace values with nearly 0 or nearly 1 for most cases and some values between for small range near 0.
$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$
Sigmoid activation function is defined as
$$a = \sigma\left(z\right) = \frac{1}{1+e^{-z}}.\tag{2}$$
<img src="chart/sigmoid.png" />
Threshold value of $0.5$ can be used for predictions: $1$ (red) if $a > 0.5$ and $0$ (blue) otherwise.
The single perceptron neural network with sigmoid activation function can be expressed as:
\begin{align}
z^{(i)} &= W x^{(i)} + b,\\
a^{(i)} &= \sigma\left(z^{(i)}\right).\\\tag{3}
\end{align}
With $m$ training examples organised in the columns of ($2 \times m$) matrix $X$, you can apply the activation function element-wise. So the model can be written as:
\begin {align}
Z &= W X + b,\\
A &= \sigma\left(Z\right),\\\tag{4}
\end{align}
When dealing with classification problems, the most commonly used cost function is the **log loss**, which is described by the following equation
$$\mathcal{L}\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m} L\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m} \large\left(\small -y^{(i)}\log\left(a^{(i)}\right) - (1-y^{(i)})\log\left(1- a^{(i)}\right) \large \right) \small,\tag{5}$$
where $y^{(i)} \in \{0,1\}$ are the original labels and $a^{(i)}$ are the continuous output values of the forward propagation step (elements of array $A$).
We want to minimize the cost function during the training. To implement gradient descent, calculate partial derivatives using chain rule
\begin{align}
\frac{\partial \mathcal{L} }{ \partial w_1 } &=
\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_1^{(i)},\\
\frac{\partial \mathcal{L} }{ \partial w_2 } &=
\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_2^{(i)},\tag{7}\\
\frac{\partial \mathcal{L} }{ \partial b } &=
\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right).
\end{align}
Equations above can be rewritten in a matrix form
\begin{align}
\frac{\partial \mathcal{L} }{ \partial W } &=
\begin{bmatrix} \frac{\partial \mathcal{L} }{ \partial w_1 } &
\frac{\partial \mathcal{L} }{ \partial w_2 }\end{bmatrix} = \frac{1}{m}\left(A - Y\right)X^T,\\
\frac{\partial \mathcal{L} }{ \partial b } &= \frac{1}{m}\left(A - Y\right)\mathbf{1}.
\tag{8}
\end{align}
where $\left(A - Y\right)$ is an array of a shape ($1 \times m$), $X^T$ is an array of a shape ($m \times 2$) and $\mathbf{1}$ is just a ($m \times 1$) vector of ones.
Then you can update the parameters:
\begin{align}
W &= W - \alpha \frac{\partial \mathcal{L} }{ \partial W },\\
b &= b - \alpha \frac{\partial \mathcal{L} }{ \partial b },
\tag{9}\end{align}
where $\alpha$ is the learning rate. Repeat the process in a loop until the cost function stops decreasing.
in last step apply activation
$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$
### Dataset
As a dataset we will generate $m=50$ data points $(x_1, x_2)$, where $x_1, x_2 \in \{0,1\}$ and save them in the `NumPy` array `X` of a shape $(2 \times m)$. The labels ($0$: blue, $1$: red) will be calculated so that $y = 1$ if $x_1 = 1$ and $x_2 = 0$, in the rest of the cases $y=0$. The labels will be saved in the array `Y` of a shape $(1 \times m)$.
<img src="chart/dataset.png" />

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

View File

@ -0,0 +1,23 @@
<?php
namespace NeuralNetworks\PerceptronClassifier;
require_once __DIR__ . '/../../../vendor/autoload.php';
require_once __DIR__ . '/../../../NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php';
use PHPUnit\Framework\TestCase;
class NeuralNetworkPerceptronClassifierTest extends TestCase
{
public function testNeuralNetworkPerceptronClassification()
{
$nnClassifier = new NeuralNetworkPerceptronClassifier();
[$X, $Y] = $nnClassifier->generateTrainingSet();
// Train the model
[$W, $b] = $nnClassifier->trainModel($X, $Y, 1000, 0.1);
// Make predictions
$predictions = $nnClassifier->predict([[0, 0, 1, 1], [0, 1, 1, 0]], $W, $b);
$this->assertEquals([0, 0, 0, 1], $predictions);
}
}