PROWAREtech








.NET: Neural Network, Supervised Deep Machine Learning Example in C#
Supervised learning is a machine learning paradigm for problems where the available data consists of labeled examples, meaning that each data point contains features (covariates) and an associated label. The goal of supervised learning algorithms is learning a function that maps feature vectors (inputs) to labels (output), based on example input-output pairs.
See unsupervised learning version. Also, see convolutional neural network example.
Learn about feedforward neural networks.
See this code in action here and here.
Download these files including how to train the network and the MNIST image files of hand-written digits with their labels: NEURALNETWORKMNIST.zip. Experiment with the number of neurons and layers. This example usage code requires SixLabors.ImageSharp v3.0.1 (NuGet package).
The neural network code:
// NeuralNetwork.cs
// Compatible with .NET Core 3.1 and later
using System;
using System.Collections.Generic;
using System.Threading;
namespace ML
{
public class NeuralNetwork
{
public List<Matrix> Weights { get; set; }
public List<Matrix>? Biases { get; set; }
public int LayerCount { get; set; }
public double ClipThreshold { get; set; }
public string ActivationName { get; set; }
public NeuralNetwork()
{
Weights = new List<Matrix>();
ActivationName = string.Empty;
}
// NOTE: clipThreshold may be needed when working with lots of data such as with somewhat large image recognition with a CNN, for example
public NeuralNetwork(int[] neuronCounts, IActivationMethods activationObject, Randomization weightsBiasesInit, double clipThreshold = 0, bool biases = true)
{
if (biases)
{
Biases = new List<Matrix>();
for (int i = 1; i < neuronCounts.Length; i++)
Biases.Add(new Matrix(neuronCounts[i], 1));
}
Weights = new List<Matrix>();
for (int i = 0; i < neuronCounts.Length - 1; i++)
Weights.Add(new Matrix(neuronCounts[i + 1], neuronCounts[i]));
LayerCount = neuronCounts.Length;
ClipThreshold = clipThreshold;
ActivationName = activationObject.ActivationName;
Randomize(weightsBiasesInit);
}
// NOTE: lambda is for the regularization term, and should be a very small fraction (between zero and one) to prevent overfitting and exploding gradients
// at zero, it provides no regularization and risks exploding gradients, use a clipThreshold, such as 5.0
// NOTE: training might be slowed with multiple threads because the batch of training data of each thread is smaller and therefore has less to learn from; consider decreasing the number of threads as the epoch count increases and experimenting with the learning rate
// NOTE: learningRate can decrease by using an algorithm such as: 0.1 ^ (epoch / (double)epochCount) * initial_learning_rate
public void Train(List<Matrix> givenInputs, List<Matrix> desiredOutputs, IActivationMethods activationObject, double learningRate, double lambda, int threadCount = 0)
{
if (ActivationName != activationObject.ActivationName)
throw new ArgumentException($"activationObject argument incorrect: {activationObject.ActivationName}");
if (givenInputs.Count != desiredOutputs.Count)
throw new ArgumentException("\"givenInputs\" count must match \"desiredOutputs\" count.");
if (threadCount < 1)
threadCount = Environment.ProcessorCount;
int mini_batch_size = givenInputs.Count / threadCount;
if (mini_batch_size > 0)
{
var bps = new List<BatchParams>();
for (int x = 0; x < threadCount; x++)
{
var bp = new BatchParams()
{
network = this,
givenInputs = givenInputs.GetRange(x * mini_batch_size, mini_batch_size),
desiredOutputs = desiredOutputs.GetRange(x * mini_batch_size, mini_batch_size),
local_weights = new List<Matrix>(),
local_biases = Biases == null ? null : new List<Matrix>(),
activationObject = activationObject,
learningRate = learningRate,
lambda = lambda,
delta_gradient_w = new Matrix[Weights.Count],
delta_gradient_b = Biases == null ? null : new Matrix[Biases.Count],
thread = new Thread(TrainMiniBatch) { IsBackground = true },
threadCount = threadCount
};
foreach (var weight in Weights)
bp.local_weights.Add(new Matrix(weight));
if(Biases != null)
foreach (var bias in Biases)
bp.local_biases.Add(new Matrix(bias));
bps.Add(bp);
bp.thread.Start(bp);
}
while (true)
{
int x = 0;
for (int i = 0; i < bps.Count; i++)
{
if (bps[i].thread == null)
x++;
else
{
if (bps[i].thread.IsAlive)
Thread.Sleep(100);
else
{
bps[i].thread = null;
x++;
}
}
}
if (x == bps.Count)
break;
}
for (int x = 0; x < Weights.Count; x++)
{
var weights = new List<Matrix>();
for (int i = 0; i < bps.Count; i++)
weights.Add(bps[i].local_weights[x]);
ParameterAveraging(Weights[x], weights);
}
if (Biases != null)
{
for (int x = 0; x < Biases.Count; x++)
{
var biases = new List<Matrix>();
for (int i = 0; i < bps.Count; i++)
biases.Add(bps[i].local_biases[x]);
ParameterAveraging(Biases[x], biases);
}
}
}
int end = threadCount * mini_batch_size + givenInputs.Count % threadCount;
for (int x = threadCount * mini_batch_size; x < end; x++)
Train(givenInputs[x], desiredOutputs[x], activationObject, learningRate, lambda);
}
// NOTE: lambda is for the regularization term, and should be a very small fraction (between zero and one) to prevent overfitting and exploding gradients
// at zero, it provides no regularization and risks exploding gradients, use a clipThreshold, such as 5.0
public void Train(Matrix givenInputs, Matrix desiredOutputs, IActivationMethods activationObject, double learningRate, double lambda)
{
if (ActivationName != activationObject.ActivationName)
throw new ArgumentException($"activationObject argument incorrect: {activationObject.ActivationName}");
var delta_gradient_w = new Matrix[Weights.Count];
var delta_gradient_b = Biases == null ? null : new Matrix[Biases.Count];
BackPropagate(this, Weights, Biases, givenInputs, desiredOutputs, activationObject, delta_gradient_w, delta_gradient_b);
var new_weights = new List<Matrix>();
var new_biases = Biases == null ? null : new List<Matrix>();
for (int i = 0; i < delta_gradient_w.Length; i++)
{
for (int j = 0; j < delta_gradient_w[i].rows; j++)
for (int k = 0; k < delta_gradient_w[i].columns; k++)
{
double w = Weights[i].GetValue(j, k);
double nw = delta_gradient_w[i].GetValue(j, k);
delta_gradient_w[i].SetValue(j, k, (1 - learningRate * lambda) * w - learningRate * nw);
}
new_weights.Add(delta_gradient_w[i]);
}
if (delta_gradient_b != null)
{
for (int i = 0; i < delta_gradient_b.Length; i++)
{
for (int j = 0; j < delta_gradient_b[i].rows; j++)
{
double b = Biases[i].GetValue(j, 0);
double nb = delta_gradient_b[i].GetValue(j, 0);
delta_gradient_b[i].SetValue(j, 0, b - learningRate * nb);
}
new_biases.Add(delta_gradient_b[i]);
}
}
Weights = new_weights;
Biases = new_biases;
}
public double Calculate(Matrix givenInputs, Matrix desiredOutputs, IActivationMethods activationObject)
{
if (givenInputs.rows != desiredOutputs.rows && givenInputs.columns != desiredOutputs.columns)
throw new ArgumentException("The givenInputs and desiredOutputs arguments are not of the same dimensions.");
Matrix ff = FeedForward(givenInputs, activationObject);
if (ff.rows == 1 && ff.columns == 1)
return ff.GetValue(0, 0); // because this is a binary output, just return the value in the FeedForward which will be between 0 and 1
return CategoricalCrossEntropy(ff, desiredOutputs); // otherwise, use Categorical Cross Entropy function to calculate loss
}
#region Private_Decl
private static void TrainMiniBatch(object? o)
{
BatchParams bp = (BatchParams)o;
for (int x = 0; x < bp.givenInputs.Count; x++)
{
BackPropagate(bp.network, bp.local_weights, bp.local_biases, bp.givenInputs[x], bp.desiredOutputs[x], bp.activationObject, bp.delta_gradient_w, bp.delta_gradient_b);
for (int i = 0; i < bp.delta_gradient_w.Length; i++)
{
for (int row = 0; row < bp.delta_gradient_w[i].rows; row++)
for (int column = 0; column < bp.delta_gradient_w[i].columns; column++)
{
double w = bp.local_weights[i].GetValue(row, column);
double nw = bp.delta_gradient_w[i].GetValue(row, column);
bp.local_weights[i].SetValue(row, column, (1 - bp.learningRate * bp.lambda) * w - (bp.learningRate / bp.threadCount) * nw);
}
}
if (bp.delta_gradient_b != null)
{
for (int i = 0; i < bp.delta_gradient_b.Length; i++)
{
for (int row = 0; row < bp.delta_gradient_b[i].rows; row++)
{
double b = bp.local_biases[i].GetValue(row, 0);
double nb = bp.delta_gradient_b[i].GetValue(row, 0);
bp.local_biases[i].SetValue(row, 0, b - (bp.learningRate / bp.threadCount) * nb);
}
}
}
}
}
private static void ParameterAveraging(Matrix globalParameters, List<Matrix> localParametersOfThreads)
{
// Initialize a temporary matrix to hold the sum of local parameters
Matrix sumOfLocalParams = new Matrix(globalParameters.rows, globalParameters.columns);
for (int threadId = 0; threadId < localParametersOfThreads.Count; threadId++)
for (int row = 0; row < localParametersOfThreads[threadId].rows; row++)
for (int column = 0; column < localParametersOfThreads[threadId].columns; column++)
sumOfLocalParams.SetValue(row, column, sumOfLocalParams.GetValue(row, column) + localParametersOfThreads[threadId].GetValue(row, column));
// Update the global parameter matrix using parameter averaging formula
for (int row = 0; row < globalParameters.rows; ++row)
for (int column = 0; column < globalParameters.columns; ++column)
globalParameters.SetValue(row, column, sumOfLocalParams.GetValue(row, column) / localParametersOfThreads.Count);
}
private static void BackPropagate(NeuralNetwork network, List<Matrix> Weights, List<Matrix>? Biases, Matrix givenInputs, Matrix desiredOutputs, IActivationMethods activationObject, Matrix[] delta_gradient_w, Matrix[]? delta_gradient_b) // uses Stochastic Gradient Descent
{
Matrix activation = givenInputs;
List<Matrix> activations = new List<Matrix> { activation };
List<Matrix> zs = new List<Matrix>();
// feed forward
for (int i = 0; i < network.LayerCount - 1; i++)
{
Matrix? z;
Matrix.Multiply(Weights[i], activation, out z);
if (z == null)
throw new ArgumentException("Cannot multiply matrices.");
if (Biases != null) // add bias
{
for (int j = 0; j < z.rows; j++)
for (int k = 0; k < z.columns; k++)
z.SetValue(j, k, z.GetValue(j, k) + Biases[i].GetValue(j, 0));
}
zs.Add(new Matrix(z));
if (i < network.LayerCount - 2)
activationObject.ActivationMethod(z);
else
activationObject.OutputActivationMethod(z);
activation = z;
activations.Add(activation);
}
// backward pass
Matrix delta = new Matrix(activations[^1].rows, activations[^1].columns);
if (activations[^1].rows == 1 && activations[^1].columns == 1 && desiredOutputs.rows == 1 && desiredOutputs.columns == 1)
delta.SetValue(0, 0, BinaryCrossEntropy(activations[^1], desiredOutputs)); // then a binary output, use Binary Cross Entropy function
else
Matrix.Delta(activations[^1], desiredOutputs, delta); // otherwise, subtract (Categorical Cross-Entropy)
if(delta_gradient_b != null)
delta_gradient_b[^1] = new Matrix(delta);
Matrix transposed = new Matrix(activations[^2]);
transposed.Transpose();
Matrix? temp;
Matrix.Multiply(delta, transposed, out temp);
if (temp == null)
throw new ArgumentException("Cannot multiply matrices.");
delta_gradient_w[^1] = temp;
for (int i = 2; i < network.LayerCount; i++)
{
var t = network.LayerCount - i;
transposed = new Matrix(Weights[t]);
transposed.Transpose();
Matrix.Multiply(transposed, delta, out temp);
if (temp == null)
throw new ArgumentException("Cannot multiply matrices.");
// multiply the derivative function on "temp"
Matrix z = zs[^i];
for (int j = 0; j < temp.rows; j++)
for (int k = 0; k < temp.columns; k++)
temp.SetValue(j, k, temp.GetValue(j, k) * activationObject.Derivative(z.GetValue(j, 0)));
delta.Copy(temp);
if (delta_gradient_b != null)
delta_gradient_b[^i] = temp;
t = network.LayerCount - i - 1;
transposed = new Matrix(activations[t]);
transposed.Transpose();
Matrix.Multiply(delta, transposed, out temp);
if (temp == null)
throw new ArgumentException("Cannot multiply matrices.");
delta_gradient_w[^i] = temp;
}
if (network.ClipThreshold > 0) // if greater than zero then will take care of exploding gradients
{
double gradients_norm, scale_factor;
if (delta_gradient_b != null) // biases
{
gradients_norm = 0;
for (int i = 0; i < delta_gradient_b.Length; i++)
for (int j = 0; j < delta_gradient_b[i].rows; j++)
for (int k = 0; k < delta_gradient_b[i].columns; k++)
gradients_norm += delta_gradient_b[i].GetValue(j, k) * delta_gradient_b[i].GetValue(j, k);
gradients_norm = Math.Sqrt(gradients_norm);
if (gradients_norm > network.ClipThreshold)
{
scale_factor = network.ClipThreshold / gradients_norm;
for (int i = 0; i < delta_gradient_b.Length; i++)
for (int j = 0; j < delta_gradient_b[i].rows; j++)
for (int k = 0; k < delta_gradient_b[i].columns; k++)
delta_gradient_b[i].SetValue(j, k, delta_gradient_b[i].GetValue(j, k) * scale_factor);
}
}
// weights
gradients_norm = 0;
for (int i = 0; i < delta_gradient_w.Length; i++)
for (int j = 0; j < delta_gradient_w[i].rows; j++)
for (int k = 0; k < delta_gradient_w[i].columns; k++)
gradients_norm += delta_gradient_w[i].GetValue(j, k) * delta_gradient_w[i].GetValue(j, k);
gradients_norm = Math.Sqrt(gradients_norm);
if (gradients_norm > network.ClipThreshold)
{
scale_factor = network.ClipThreshold / gradients_norm;
for (int i = 0; i < delta_gradient_w.Length; i++)
for (int j = 0; j < delta_gradient_w[i].rows; j++)
for (int k = 0; k < delta_gradient_w[i].columns; k++)
delta_gradient_w[i].SetValue(j, k, delta_gradient_w[i].GetValue(j, k) * scale_factor);
}
}
}
private Matrix FeedForward(Matrix givenInputs, IActivationMethods activationObject)
{
if (ActivationName != activationObject.ActivationName)
throw new ArgumentException($"activationObject argument incorrect: {activationObject.ActivationName}");
for (int i = 0; i < LayerCount - 1; i++)
{
Matrix? temp;
Matrix.Multiply(Weights[i], givenInputs, out temp);
if (temp == null)
throw new ArgumentException("Cannot multiply matrices.");
if (Biases != null) // add bias
{
for (int j = 0; j < temp.rows; j++)
for (int k = 0; k < temp.columns; k++)
temp.SetValue(j, k, temp.GetValue(j, k) + Biases[i].GetValue(j, 0));
}
if (i < LayerCount - 2)
activationObject.ActivationMethod(temp);
else
activationObject.OutputActivationMethod(temp);
givenInputs = temp;
}
return givenInputs;
}
private void Randomize(Randomization initialization)
{
var rand = new Random();
for (int a = 0; a < Weights.Count; a++)
{
double init;
switch (initialization)
{
case Randomization.HeNormal: // good for ReLU (supposedly)
init = Math.Sqrt(2.0 / Weights[a].columns);
break;
case Randomization.GlorotXavier: // for general use
init = Math.Sqrt(6.0 / (Weights[a].columns + Weights[a].rows));
break;
default:
init = 1;
break;
}
for (int i = 0; i < Weights[a].rows; i++)
for (int j = 0; j < Weights[a].columns; j++)
Weights[a].SetValue(i, j, rand.NextDouble() * init - init * 0.5);
}
if (Biases != null)
{
for (int a = 0; a < Biases.Count; a++)
{
double init;
switch (initialization)
{
case Randomization.HeNormal: // good for ReLU (supposedly)
init = Math.Sqrt(2.0 / Biases[a].columns);
break;
case Randomization.GlorotXavier: // for general use
init = Math.Sqrt(6.0 / (Biases[a].columns + Biases[a].rows));
break;
default:
init = 1;
break;
}
for (int i = 0; i < Biases[a].rows; i++)
Biases[a].SetValue(i, 0, rand.NextDouble() * init - init * 0.5);
}
}
}
private static double CategoricalCrossEntropy(Matrix feedForwardOutputs, Matrix desiredOutput) // Categorical Cross Entropy, desiredOutput should be a one-hot vector
{
int iO = indexMax(feedForwardOutputs), iL = indexMax(desiredOutput);
double a = (iO == iL) ? feedForwardOutputs.GetValue(iO, 0) : 0;
if (a == 0)
return 1;
if (a == 1)
return 0;
return -Math.Log(a);
}
private static double BinaryCrossEntropy(Matrix feedForwardOutputs, Matrix desiredOutput) // Binary Cross Entropy, both parameters should be a single input: [1, 1] matrix
{
const double epsilon = 1e-15;
double y_true = desiredOutput.GetValue(0, 0);
double y_pred = Math.Max(epsilon, Math.Min(1 - epsilon, feedForwardOutputs.GetValue(0, 0)));
return -(y_true * Math.Log(y_pred) + (1 - y_true) * Math.Log(1 - y_pred));
}
private static int indexMax(Matrix m) // only pass 1 dimension matrices
{
int index = -1;
double a, max = double.MinValue;
for (int i = 0; i < m.rows; i++)
{
a = m.GetValue(i, 0);
if (a > max)
{
max = a;
index = i;
}
}
return index;
}
public string ToJson()
{
return System.Text.Json.JsonSerializer.Serialize(this);
}
public static NeuralNetwork? FromJson(string json)
{
return System.Text.Json.JsonSerializer.Deserialize<NeuralNetwork>(json);
}
#endregion
}
public enum Randomization // play with learning rate when switching between these randomizations
{
HeNormal, // for ReLU (supposedly)
GlorotXavier // general use
}
internal class BatchParams
{
public NeuralNetwork network;
public List<Matrix> givenInputs, desiredOutputs, local_weights;
public List<Matrix>? local_biases;
public IActivationMethods activationObject;
public Matrix[] delta_gradient_w;
public Matrix[]? delta_gradient_b;
public double learningRate;
public double lambda;
public Thread? thread;
public int threadCount;
}
public class Matrix
{
public double[] data { get; set; }
public int rows { get; set; }
public int columns { get; set; }
public Matrix()
{
data = new double[0];
}
public Matrix(int rows, int columns)
{
this.rows = rows;
this.columns = columns;
data = new double[rows * columns];
}
public Matrix(int rows, int columns, double[] values, bool transpose = false)
{
this.rows = rows;
this.columns = columns;
data = new double[rows * columns];
if (transpose)
{
for (var c = 0; c < columns; c++)
for (var r = 0; r < rows; r++)
SetValue(c, r, values[r * columns + c]);
}
else
Array.Copy(values, data, rows * columns);
}
public Matrix(Matrix m)
{
rows = m.rows;
columns = m.columns;
data = new double[rows * columns];
Array.Copy(m.data, data, rows * columns);
}
public void Transpose()
{
var result = new Matrix(columns, rows);
for (var c = 0; c < columns; c++)
for (var r = 0; r < rows; r++)
result.SetValue(c, r, GetValue(r, c));
Copy(result);
}
public static void Add(Matrix M, double V)
{
for (int i = 0; i < M.rows; i++)
for (int j = 0; j < M.columns; j++)
M.SetValue(i, j, M.GetValue(i, j) + V);
}
public static void Multiply(Matrix A, Matrix B, out Matrix? C) // this is part of the dot product
{
C = null;
if (A.columns == B.rows)
{
int m = A.rows, p = B.columns, n = A.columns;
C = new Matrix(m, p);
for (int i = 0; i < C.rows; i++)
for (int j = 0; j < C.columns; j++)
{
for (int k = 0; k < n; k++)
{
double d = A.GetValue(i, k) * B.GetValue(k, j);
double v = C.GetValue(i, j);
C.SetValue(i, j, v + d);
}
}
}
}
public void Dropout(double dropoutRate) // apply a dropout to the matrix
{
var rand = new Random();
for (int i = 0; i < rows; i++)
for (int j = 0; j < columns; j++)
if (rand.NextDouble() < dropoutRate)
SetValue(i, j, 0.0);
}
public static void Delta(Matrix outputs, Matrix desiredOutput, Matrix delta)
{
for (int i = 0; i < delta.rows; i++)
delta.SetValue(i, 0, outputs.GetValue(i, 0) - desiredOutput.GetValue(i, 0));
}
public void SetValuesAndTranspose(int rows, int columns, double[] values)
{
this.rows = rows;
this.columns = columns;
data = new double[rows * columns];
for (var c = 0; c < columns; c++)
for (var r = 0; r < rows; r++)
SetValue(c, r, values[r * columns + c]);
}
public double GetValue(int row, int column)
{
return data[row * columns + column];
}
public void SetValue(int row, int column, double value)
{
data[row * columns + column] = value;
}
public void SetValues(int rows, int columns, double[] values)
{
if (rows * columns != this.rows * this.columns)
data = new double[rows * columns];
this.rows = rows;
this.columns = columns;
Array.Copy(values, data, rows * columns);
}
public void Copy(Matrix m)
{
if (rows * columns != m.rows * m.columns)
data = new double[m.rows * m.columns];
rows = m.rows;
columns = m.columns;
Array.Copy(m.data, data, rows * columns);
}
public void Clear()
{
rows = columns = 0;
data = new double[0];
}
public void Print()
{
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < columns; j++)
{
Console.Write(GetValue(i, j) + " ");
}
Console.WriteLine();
}
Console.WriteLine();
}
}
public interface IActivationMethods
{
public void ActivationMethod(Matrix outputs);
public void OutputActivationMethod(Matrix outputs);
public double Derivative(double input);
public string ActivationName { get; }
}
public class ActivationReLUSoftmax : IActivationMethods
{
public void ActivationMethod(Matrix outputs) // Rectified Linear Unit function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.ReLU(outputs.GetValue(i, j)));
}
public void OutputActivationMethod(Matrix outputs)
{
Functions.Softmax(outputs);
}
public double Derivative(double input)
{
return Functions.ReLUPrime(input);
}
public string ActivationName { get { return "ReLU/Softmax"; } }
}
public class ActivationELUSoftmax : IActivationMethods
{
private readonly double alpha;
public ActivationELUSoftmax(double alpha = 0.25)
{
this.alpha = alpha;
}
public void ActivationMethod(Matrix outputs) // Exponential Linear Unit function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.ELU(outputs.GetValue(i, j), alpha));
}
public void OutputActivationMethod(Matrix outputs)
{
Functions.Softmax(outputs);
}
public double Derivative(double input)
{
return Functions.ELUPrime(input, alpha);
}
public string ActivationName { get { return "ELU/Softmax"; } }
}
public class ActivationLeakyReLUSoftmax : IActivationMethods
{
private readonly double alpha;
public ActivationLeakyReLUSoftmax(double alpha = 0.25)
{
this.alpha = alpha;
}
public void ActivationMethod(Matrix outputs) // Leaky Rectified Linear Unit function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.LeakyReLU(outputs.GetValue(i, j), alpha));
}
public void OutputActivationMethod(Matrix outputs)
{
Functions.Softmax(outputs);
}
public double Derivative(double input)
{
return Functions.LeakyReLUPrime(input, alpha);
}
public string ActivationName { get { return "LeakyReLU/Softmax"; } }
}
public class ActivationReLUSigmoid : IActivationMethods // NOTE: useful for networks with binary output (single neuron output)
{
public void ActivationMethod(Matrix outputs) // Rectified Linear Unit function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.ReLU(outputs.GetValue(i, j)));
}
public void OutputActivationMethod(Matrix outputs) // Used for binary outputs, 0 to 1
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.Sigmoid(outputs.GetValue(i, j)));
}
public double Derivative(double input)
{
return Functions.ReLUPrime(input);
}
public string ActivationName { get { return "ReLU/Sigmoid"; } }
}
public class ActivationELUSigmoid : IActivationMethods // NOTE: useful for networks with binary output (single neuron output)
{
private readonly double alpha;
public ActivationELUSigmoid(double alpha = 0.25)
{
this.alpha = alpha;
}
public void ActivationMethod(Matrix outputs) // Rectified Linear Unit function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.ELU(outputs.GetValue(i, j), alpha));
}
public void OutputActivationMethod(Matrix outputs) // Used for binary outputs, 0 to 1
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.Sigmoid(outputs.GetValue(i, j)));
}
public double Derivative(double input)
{
return Functions.ELUPrime(input, alpha);
}
public string ActivationName { get { return "ELU/Sigmoid"; } }
}
public class ActivationLeakyReLUSigmoid : IActivationMethods // NOTE: useful for networks with binary output (single neuron output)
{
private readonly double alpha;
public ActivationLeakyReLUSigmoid(double alpha = 0.25)
{
this.alpha = alpha;
}
public void ActivationMethod(Matrix outputs) // Leaky Rectified Linear Unit function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.LeakyReLU(outputs.GetValue(i, j), alpha));
}
public void OutputActivationMethod(Matrix outputs) // Used for binary outputs, 0 to 1
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.Sigmoid(outputs.GetValue(i, j)));
}
public double Derivative(double input)
{
return Functions.LeakyReLUPrime(input, alpha);
}
public string ActivationName { get { return "LeakyReLU/Sigmoid"; } }
}
public class ActivationTanhSigmoid : IActivationMethods
{
public void ActivationMethod(Matrix outputs) // Tanh function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.Tanh(outputs.GetValue(i, j)));
}
public void OutputActivationMethod(Matrix outputs)
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.Sigmoid(outputs.GetValue(i, j)));
}
public double Derivative(double input)
{
return Functions.TanhPrime(input);
}
public string ActivationName { get { return "Tanh/Sigmoid"; } }
}
public class ActivationTanhSoftmax : IActivationMethods
{
public void ActivationMethod(Matrix outputs) // Tanh function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.Tanh(outputs.GetValue(i, j)));
}
public void OutputActivationMethod(Matrix outputs)
{
Functions.Softmax(outputs);
}
public double Derivative(double input)
{
return Functions.TanhPrime(input);
}
public string ActivationName { get { return "Tanh/Softmax"; } }
}
public class ActivationTanh : IActivationMethods // Good for regression problems where the goal is to predict continuous values
{
public void ActivationMethod(Matrix outputs) // Tanh function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.Tanh(outputs.GetValue(i, j)));
}
public void OutputActivationMethod(Matrix outputs)
{
ActivationMethod(outputs);
}
public double Derivative(double input)
{
return Functions.TanhPrime(input);
}
public string ActivationName { get { return "Tanh"; } }
}
public class ActivationSigmoid : IActivationMethods
{
public void ActivationMethod(Matrix outputs) // Sigmoid function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.Sigmoid(outputs.GetValue(i, j)));
}
public void OutputActivationMethod(Matrix outputs)
{
ActivationMethod(outputs);
}
public double Derivative(double input)
{
return Functions.SigmoidPrime(input);
}
public string ActivationName { get { return "Sigmoid"; } }
}
public class ActivationLinear : IActivationMethods
{
public void ActivationMethod(Matrix outputs) // Sigmoid function applied to whole matrix
{
for (int i = 0; i < outputs.rows; i++)
for (int j = 0; j < outputs.columns; j++)
outputs.SetValue(i, j, Functions.Linear(outputs.GetValue(i, j)));
}
public void OutputActivationMethod(Matrix outputs)
{
ActivationMethod(outputs);
}
public double Derivative(double input)
{
return Functions.LinearPrime();
}
public string ActivationName { get { return "Linear"; } }
}
public static class Functions
{
public static double Linear(double x) // Linear function
{
return x;
}
public static double LinearPrime() // derivative of Linear function (the line's slope)
{
return 1;
}
// NOTE: alpha default might be 0.01, but this can be modified, bigger or smaller; tensorflow uses 0.2 while keras uses 0.3
public static double LeakyReLU(double x, double alpha) // Rectified Linear Unit function (Leaky variant)
{
return x >= 0 ? x : (alpha * x);
}
public static double LeakyReLUPrime(double x, double alpha) // derivative of Leaky ReLU function
{
return x >= 0 ? 1 : alpha;
}
public static double ReLU(double x) // Rectified Linear Unit function
{
return x > 0 ? x : 0;
}
public static double ReLUPrime(double x) // derivative of ReLU function
{
return x > 0 ? 1 : 0;
}
public static double ELU(double x, double alpha) // Exponential Linear Unit function
{
return x >= 0 ? x : (alpha * (Math.Exp(x) - 1));
}
public static double ELUPrime(double x, double alpha) // derivative of ELU function
{
return x >= 0 ? 1 : (alpha * Math.Exp(x));
}
public static double Tanh(double x)
{
return (Math.Exp(x) - Math.Exp(-x)) / (Math.Exp(x) + Math.Exp(-x));
}
public static double TanhPrime(double x)
{
return 1 - ((Math.Exp(x) - Math.Exp(-x)) / (Math.Exp(x) + Math.Exp(-x))) * ((Math.Exp(x) - Math.Exp(-x)) / (Math.Exp(x) + Math.Exp(-x))); // this is simply: 1 - (tanh(x) * tanh(x))
}
public static double Sigmoid(double x)
{
return 1.0 / (1 + Math.Exp(-x));
}
public static double SigmoidPrime(double x) // derivative of Sigmoid function
{
return (1.0 / (1 + Math.Exp(-x))) * (1.0 - (1.0 / (1 + Math.Exp(-x)))); // this is simply: Sigmoid(x) * (1.0 - Sigmoid(x))
}
public static void Softmax(Matrix input)
{
double max = double.MinValue;
for (int i = 0; i < input.rows; i++)
max = Math.Max(max, input.GetValue(i, 0));
double val, sum = 0;
for (int i = 0; i < input.rows; i++)
{
val = Math.Exp(input.GetValue(i, 0) - max);
input.SetValue(i, 0, val);
sum += val;
}
for (int i = 0; i < input.rows; i++)
input.SetValue(i, 0, input.GetValue(i, 0) / sum);
}
}
}