PROWAREtech
C++: Neural Network, Supervised Deep Machine Learning Example
Supervised learning is a machine learning paradigm for problems where the available data consists of labeled examples, meaning that each data point contains features (covariates) and an associated label. The goal of supervised learning algorithms is learning a function that maps feature vectors (inputs) to labels (output), based on example input-output pairs.
This neural network code is available in a C# library. This C++ library runs about 15% faster than the C# library compiled with .NET 8, which says a lot about how far .NET has come.
See unsupervised learning version. Also, see convolutional neural network example.
Learn about feedforward neural networks. Learn more about the lambda parameter.
Visit the playground for related.
This network supports both Categorical Cross-Entropy (CCE) and Sparse Categorical Cross-Entropy (SCCE). To support CCE, supply a one-hot vector, for SCCE, supply the index into the output layer — that's it! Oh, plus make sure to use a linear output neuron activation for SCCE, and SoftMax for CCE.
Download these files including how to train the network and the MNIST image files of hand-written digits with their labels: NEURALNETWORK.zip. Experiment with the number of neurons and layers. This example usage code requires SixLabors.ImageSharp (NuGet package).
The neural network code followed by some C# code for creating a confusion matrix chart:
// NOTE: This network only supports forms of Categorical Cross-Entropy loss a.k.a. SoftMax loss for multi-class classification.
// NOTE: This network supports "Sparse Categorical Cross-Entropy" (SCCE) which requires a Linear final layer activation.
// NOTE: SCCE requires much less memory for networks with a large number of outputs, like NLP networks, than having to
// create a one-hot array for each output. SCCE networks are more efficient with resources.
#ifndef _CCE_NEURAL_NETWORK_H
#define _CCE_NEURAL_NETWORK_H
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <thread>
#include <cstdlib>
#include <float.h>
#include <string>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <random>
#include <cmath>
#include <cctype>
#include <limits>
#define MACRO_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
#define MACRO_MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
namespace ML
{
class Matrix
{
private:
float* data; // "data" is the name of the json object key
int references;
unsigned rows, columns; // "rows" and "columns" are the names of the json object keys
void copy_array(float* values)
{
for (unsigned int i = 0; i < rows * columns; i++)
data[i] = values[i];
}
Matrix() : data(nullptr), references(0), rows(0), columns(0) {}
Matrix(const Matrix& m) : data(nullptr), references(0), rows(0), columns(0) {}
public:
~Matrix()
{
delete[] data;
data = nullptr;
}
Matrix(int rows, int columns) : references(0)
{
this->rows = rows;
this->columns = columns;
unsigned int len = rows * columns;
data = new float[len];
for (unsigned int i = 0; i < len; i++)
data[i] = 0.0f;
}
Matrix(int rows, int columns, float* values) : references(0)
{
this->rows = rows;
this->columns = columns;
data = new float[rows * columns];
copy_array(values);
}
Matrix(Matrix* m) : references(0)
{
rows = m->rows;
columns = m->columns;
data = new float[rows * columns];
copy_array(m->data);
}
void AddReference()
{
references++;
}
int RemoveReference()
{
int ret = --references;
if (ret < 1)
delete this;
return ret;
}
const float* Data() const
{
return data;
}
unsigned int Rows() const
{
return rows;
}
unsigned int Columns() const
{
return columns;
}
void Transpose()
{
Matrix result(columns, rows);
for (unsigned int c = 0; c < columns; c++)
{
for (unsigned int r = 0; r < rows; r++)
result.SetValue(c, r, GetValue(r, c));
}
Copy(&result);
}
static void Add(Matrix* M, float V)
{
for (unsigned int i = 0; i < M->rows; i++)
{
for (unsigned int j = 0; j < M->columns; j++)
M->SetValue(i, j, M->GetValue(i, j) + V);
}
}
static void Multiply(Matrix* A, Matrix* B, Matrix** C) // this is part of the dot product
{
*C = nullptr;
const unsigned int m = A->rows, p = B->columns, n = A->columns;
if (n == B->rows) // then matrices cannot be mutliplied because dimensions are incompatible
{
double a, b;
float c;
*C = new Matrix(m, p);
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < p; j++)
for (unsigned int k = 0; k < n; k++)
{
a = A->GetValue(i, k);
b = B->GetValue(k, j);
c = (*C)->SetValue(i, j, (float)((*C)->GetValue(i, j) + a * b));
if (std::isinf(c) || std::isnan(c)) // then inf or NaN, delete and nullify (*C);
{
delete* C;
*C = nullptr;
return;
}
}
}
}
void Dropout(float dropoutRate) // apply a dropout to the matrix
{
for (unsigned int i = 0; i < rows; i++)
{
for (unsigned int j = 0; j < columns; j++)
{
if (rand() / (float)RAND_MAX < dropoutRate)
SetValue(i, j, 0.0f);
}
}
}
float GetValue(unsigned int row, unsigned int column)
{
return data[row * columns + column];
}
float SetValue(unsigned int row, unsigned int column, float value)
{
return data[row * columns + column] = value;
}
void Copy(Matrix* m)
{
if (rows * columns != m->rows * m->columns)
{
delete[] data;
data = new float[m->rows * m->columns];
}
rows = m->rows;
columns = m->columns;
copy_array(m->data);
}
};
class MatrixArray
{
private:
unsigned int arraySize, nextIndex;
Matrix** matrices;
void nullify()
{
matrices = new Matrix * [arraySize];
for (unsigned int i = 0; i < arraySize; i++)
matrices[i] = nullptr;
}
MatrixArray() : arraySize(1000), nextIndex(0)
{
nullify();
}
MatrixArray(unsigned int count) : arraySize(count), nextIndex(count)
{
nullify();
}
MatrixArray(const MatrixArray* matrixArray) : arraySize(matrixArray->arraySize), nextIndex(0)
{
nullify();
for (unsigned int i = 0; i < matrixArray->nextIndex; i++)
Add(new Matrix(matrixArray->GetMatrix(i)));
}
public:
static MatrixArray* CreateMatrixArray()
{
return new MatrixArray();
}
static MatrixArray* CreateMatrixArray(unsigned int count)
{
return new MatrixArray(count);
}
static MatrixArray* CreateMatrixArray(const MatrixArray* matrixArray)
{
return new MatrixArray(matrixArray);
}
~MatrixArray()
{
for (unsigned int i = 0; i < nextIndex; i++)
{
if (matrices[i])
if (!matrices[i]->RemoveReference())
matrices[i] = nullptr;
}
delete[] matrices;
matrices = nullptr;
}
void Add(Matrix* matrix)
{
if (nextIndex == arraySize)
{
unsigned int size = arraySize + 1000;
Matrix** newMatrices = new Matrix * [size];
for (unsigned int i = 0; i < size; i++)
newMatrices[i] = (i < arraySize ? matrices[i] : nullptr);
delete[] matrices;
matrices = newMatrices;
newMatrices = nullptr;
arraySize += 1000;
}
if (matrix)
matrix->AddReference();
if (matrices[nextIndex])
matrices[nextIndex]->RemoveReference();
matrices[nextIndex] = matrix;
nextIndex++;
}
unsigned int Count() const { return nextIndex; }
Matrix* GetMatrix(const unsigned int index) const
{
return matrices[index];
}
void Set(Matrix* matrix, unsigned int position)
{
if (matrices[position])
matrices[position]->RemoveReference();
if (matrix)
matrix->AddReference();
matrices[position] = matrix;
}
MatrixArray* GetRange(const unsigned int index, const unsigned int length) const
{
MatrixArray* newRange = MatrixArray::CreateMatrixArray();
for (unsigned int i = 0; i < length; i++)
newRange->Add(matrices[index + i]);
return newRange;
}
static void ShuffleParallelArrays(MatrixArray* array1, MatrixArray* array2 = nullptr)
{
static std::random_device rng;
static std::mt19937 gen(rng());
Matrix* value;
for (unsigned int n = array1->Count(); n > 1;)
{
n--;
unsigned int k = gen() % (n + 1);
value = array1->matrices[k];
array1->matrices[k] = array1->matrices[n];
array1->matrices[n] = value;
if (array2)
{
value = array2->matrices[k];
array2->matrices[k] = array2->matrices[n];
array2->matrices[n] = value;
}
}
}
};
class Randomization // play with learning rate when switching between these randomizations
{
private:
static float GetDouble()
{
static std::random_device rng;
static std::mt19937 gen(rng());
static std::uniform_real_distribution<float> dist(0.0f, 1.0f);
return dist(gen);
}
public:
static void RandomizeHeNormal(MatrixArray* Weights, MatrixArray* Biases)
{
for (unsigned int a = 0; a < Weights->Count(); a++)
{
float init = (float)sqrt(2.0 / Weights->GetMatrix(a)->Columns()); // HeNormal: good for ReLU activation
for (unsigned int i = 0; i < Weights->GetMatrix(a)->Rows(); i++)
for (unsigned int j = 0; j < Weights->GetMatrix(a)->Columns(); j++)
Weights->GetMatrix(a)->SetValue(i, j, GetDouble() * init - init * 0.5f);
}
for (unsigned int a = 0; a < Biases->Count(); a++)
{
for (unsigned int i = 0; i < Biases->GetMatrix(a)->Rows(); i++)
Biases->GetMatrix(a)->SetValue(i, 0, GetDouble() * 0.5f - 0.25f);
}
}
static void RandomizeGlorotXavier(MatrixArray* Weights, MatrixArray* Biases)
{
for (unsigned int a = 0; a < Weights->Count(); a++)
{
float init = (float)sqrt(6.0 / (Weights->GetMatrix(a)->Columns() + Weights->GetMatrix(a)->Rows())); // GlorotXavier: good for Tanh/Sigmoid activation
for (unsigned int i = 0; i < Weights->GetMatrix(a)->Rows(); i++)
for (unsigned int j = 0; j < Weights->GetMatrix(a)->Columns(); j++)
Weights->GetMatrix(a)->SetValue(i, j, GetDouble() * init - init * 0.5f);
}
for (unsigned int a = 0; a < Biases->Count(); a++)
{
for (unsigned int i = 0; i < Biases->GetMatrix(a)->Rows(); i++)
Biases->GetMatrix(a)->SetValue(i, 0, GetDouble() * 0.5f - 0.25f);
}
}
};
struct Functions
{
static unsigned int GetIndexMax(Matrix* m) // only pass 1 dimension matrices
{
unsigned int index = 0;
float a, maximum = m->GetValue(0, 0);
for (unsigned int i = 1; i < m->Rows(); i++)
{
a = m->GetValue(i, 0);
if (a > maximum)
{
maximum = a;
index = i;
}
}
return index;
}
static float Linear(float x) // Linear function
{
return x;
}
static float LinearPrime() // derivative of Linear function (the line's slope)
{
return 1;
}
// alpha default might be 0.01, but this can be modified, bigger or smaller; tensorflow uses 0.2 while keras uses 0.3
static float LeakyReLU(float x, float alpha) // Rectified Linear Unit function (Leaky variant)
{
return x >= 0.0f ? x : (alpha * x);
}
static float LeakyReLUPrime(float x, float alpha) // derivative of Leaky ReLU function
{
return x >= 0.0f ? 1.0f : alpha;
}
static float ReLU(float x) // Rectified Linear Unit function
{
return x > 0.0f ? x : 0.0f;
}
static float ReLUPrime(float x) // derivative of ReLU function
{
return x > 0.0f ? 1.0f : 0.0f;
}
static float ELU(float x, float alpha) // Exponential Linear Unit function
{
return x >= 0.0f ? x : (alpha * (exp(x) - 1));
}
static float ELUPrime(float x, float alpha) // derivative of ELU function
{
return x >= 0.0f ? 1.0f : (alpha * exp(x));
}
static float Tanh(float x)
{
return (exp(x) - exp(-x)) / (exp(x) + exp(-x));
}
static float TanhPrime(float x)
{
return 1.0f - ((exp(x) - exp(-x)) / (exp(x) + exp(-x))) * ((exp(x) - exp(-x)) / (exp(x) + exp(-x))); // this is simply: 1 - (tanh(x) * tanh(x))
}
static float Sigmoid(float x)
{
return 1.0f / (1 + exp(-x));
}
static float SigmoidPrime(float x) // derivative of Sigmoid function
{
return (1.0f / (1 + exp(-x))) * (1.0f - (1.0f / (1 + exp(-x)))); // this is simply: Sigmoid(x) * (1.0 - Sigmoid(x))
}
static void SoftMax(Matrix* input)
{
float maximum = input->GetValue(0, 0);
for (unsigned int i = 1; i < input->Rows(); i++)
if (input->GetValue(i, 0) > maximum)
maximum = input->GetValue(i, 0);
double sum = 0;
for (unsigned int i = 0; i < input->Rows(); i++)
sum += input->SetValue(i, 0, exp(input->GetValue(i, 0) - maximum));
for (unsigned int i = 0; i < input->Rows(); i++)
input->SetValue(i, 0, (float)(input->GetValue(i, 0) / sum));
}
};
class IActivationMethods
{
public:
virtual void ActivationMethod(Matrix* outputs) const = 0;
virtual void OutputActivationMethod(Matrix* outputs) const = 0;
virtual float Derivative(float input) const = 0;
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const = 0;
};
class ActivationReLUSoftMax : public IActivationMethods
{
public:
void ActivationMethod(Matrix* outputs) const override // Rectified Linear Unit function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::ReLU(outputs->GetValue(i, j)));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
Functions::SoftMax(outputs);
}
float Derivative(float input) const override
{
return Functions::ReLUPrime(input);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeHeNormal(Weights, Biases);
}
};
class ActivationELUSoftMax : public IActivationMethods
{
private:
float alpha;
public:
ActivationELUSoftMax(float alpha = 1.0f) : alpha(alpha) {}
void ActivationMethod(Matrix* outputs) const override // Exponential Linear Unit function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::ELU(outputs->GetValue(i, j), alpha));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
Functions::SoftMax(outputs);
}
float Derivative(float input) const override
{
return Functions::ELUPrime(input, alpha);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeHeNormal(Weights, Biases);
}
};
class ActivationLeakyReLUSoftMax : public IActivationMethods
{
private:
float alpha;
public:
ActivationLeakyReLUSoftMax(float alpha = 0.2) : alpha(alpha) {}
void ActivationMethod(Matrix* outputs) const override // Leaky Rectified Linear Unit function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::LeakyReLU(outputs->GetValue(i, j), alpha));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
Functions::SoftMax(outputs);
}
float Derivative(float input) const override
{
return Functions::LeakyReLUPrime(input, alpha);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeHeNormal(Weights, Biases);
}
};
class ActivationTanhSoftMax : public IActivationMethods
{
public:
void ActivationMethod(Matrix* outputs) const override // Tanh function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::Tanh(outputs->GetValue(i, j)));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
Functions::SoftMax(outputs);
}
float Derivative(float input) const override
{
return Functions::TanhPrime(input);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeGlorotXavier(Weights, Biases);
}
};
class ActivationSigmoidSoftMax : public IActivationMethods
{
public:
void ActivationMethod(Matrix* outputs) const override // Sigmoid function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::Sigmoid(outputs->GetValue(i, j)));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
Functions::SoftMax(outputs);
}
float Derivative(float input) const override
{
return Functions::TanhPrime(input);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeGlorotXavier(Weights, Biases);
}
};
class ActivationReLULinear : public IActivationMethods
{
public:
void ActivationMethod(Matrix* outputs) const override // Rectified Linear Unit function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::ReLU(outputs->GetValue(i, j)));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
// Linear
}
float Derivative(float input) const override
{
return Functions::ReLUPrime(input);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeHeNormal(Weights, Biases);
}
};
class ActivationELULinear : public IActivationMethods
{
private:
float alpha;
public:
ActivationELULinear(float alpha = 1.0f) : alpha(alpha) {}
void ActivationMethod(Matrix* outputs) const override // Exponential Linear Unit function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::ELU(outputs->GetValue(i, j), alpha));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
// Linear
}
float Derivative(float input) const override
{
return Functions::ELUPrime(input, alpha);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeHeNormal(Weights, Biases);
}
};
class ActivationLeakyReLULinear : public IActivationMethods
{
private:
float alpha;
public:
ActivationLeakyReLULinear(float alpha = 0.2) : alpha(alpha) {}
void ActivationMethod(Matrix* outputs) const override // Leaky Rectified Linear Unit function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::LeakyReLU(outputs->GetValue(i, j), alpha));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
// Linear
}
float Derivative(float input) const override
{
return Functions::LeakyReLUPrime(input, alpha);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeHeNormal(Weights, Biases);
}
};
class ActivationTanhLinear : public IActivationMethods
{
public:
void ActivationMethod(Matrix* outputs) const override // Tanh function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::Tanh(outputs->GetValue(i, j)));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
// Linear
}
float Derivative(float input) const override
{
return Functions::TanhPrime(input);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeGlorotXavier(Weights, Biases);
}
};
class ActivationSigmoidLinear : public IActivationMethods
{
public:
void ActivationMethod(Matrix* outputs) const override // Sigmoid function applied to whole matrix
{
for (unsigned int i = 0; i < outputs->Rows(); i++)
{
for (unsigned int j = 0; j < outputs->Columns(); j++)
outputs->SetValue(i, j, Functions::Sigmoid(outputs->GetValue(i, j)));
}
}
void OutputActivationMethod(Matrix* outputs) const override
{
// Linear
}
float Derivative(float input) const override
{
return Functions::TanhPrime(input);
}
virtual void Randomize(MatrixArray* Weights, MatrixArray* Biases) const override
{
Randomization::RandomizeGlorotXavier(Weights, Biases);
}
};
struct Cost
{
static void Delta(Matrix* outputs, Matrix* desiredOutputs, Matrix* deltaValue)
{
if (desiredOutputs->Rows() == 1 && deltaValue->Rows() != desiredOutputs->Rows()) // this is the same as "Sparse Categorical Cross-Entropy" (SCCE) and requires a Linear final activation; it requires that the calling program FeedForward(givenInputs, activationObject), SoftMax(feedForward) and then find the index of the maximum of the feedForward.
{
deltaValue->Copy(outputs);
unsigned int desiredIndex = (unsigned int)desiredOutputs->GetValue(0, 0);
deltaValue->SetValue(desiredIndex, 0, deltaValue->GetValue(desiredIndex, 0) - 1);
}
else // this is "Categorical Cross-Entropy" (CCE) and it requires a one-hot array for the desired outputs; it is not memory efficient.
{
for (unsigned int i = 0; i < deltaValue->Rows(); i++)
deltaValue->SetValue(i, 0, outputs->GetValue(i, 0) - desiredOutputs->GetValue(i, 0));
}
}
};
class NeuralNetwork;
struct BatchParams
{
NeuralNetwork* network;
MatrixArray* givenInputsBatch;
MatrixArray* desiredOutputsBatch;
MatrixArray* local_weights;
MatrixArray* local_biases;
MatrixArray* delta_gradient_w;
MatrixArray* delta_gradient_b;
IActivationMethods* activationObject;
float learningRate, lambda, clipThreshold;
unsigned int threadCount;
std::thread threadObj;
bool threadActive, errorFound;
~BatchParams();
BatchParams(NeuralNetwork* network,
MatrixArray* givenInputs,
MatrixArray* desiredOutputs,
IActivationMethods* activationObject,
float learningRate,
float lambda,
unsigned int threadCount,
float clipThreshold);
};
class NeuralNetwork
{
private:
const unsigned int layerCount;
NeuralNetwork() : layerCount(0), Weights(nullptr), Biases(nullptr) {}
NeuralNetwork(const NeuralNetwork& nn) : layerCount(0), Weights(nullptr), Biases(nullptr) {}
public:
MatrixArray* Weights;
MatrixArray* Biases;
NeuralNetwork(MatrixArray* Weights, MatrixArray* Biases, const unsigned int LayerCount) : Weights(Weights), Biases(Biases), layerCount(LayerCount) {}
~NeuralNetwork()
{
delete Weights;
Weights = nullptr;
delete Biases;
Biases = nullptr;
};
const unsigned int LayerCount() const { return layerCount; }
// clipThreshold may be needed when working with lots of data such as with somewhat large image recognition with a CNN, for example, but having clip threshold makes the network learn more slowly
// HeNormal initialization is generally used for networks with ReLU activations, as it considers the non-linearities introduced by ReLUs.
// If using non-ReLU activations, GlorotXavier initialization is used, which is designed for Sigmoid and Tanh functions.
static NeuralNetwork* CreateNeuralNetwork(const unsigned int* neuronLayers, const unsigned int neuronLayersLength, IActivationMethods* activationObject, bool biases = true)
{
MatrixArray* Biases = MatrixArray::CreateMatrixArray();
if (biases)
{
for (unsigned int i = 1; i < neuronLayersLength; i++)
{
if (!neuronLayers[i])
{
delete Biases;
return nullptr;
}
Biases->Add(new Matrix(neuronLayers[i], 1));
}
}
MatrixArray* Weights = MatrixArray::CreateMatrixArray();
for (unsigned int i = 0; i < neuronLayersLength - 1; i++)
{
if (!neuronLayers[i + 1] || !neuronLayers[i])
{
delete Weights;
delete Biases;
return nullptr;
}
Weights->Add(new Matrix(neuronLayers[i + 1], neuronLayers[i]));
}
activationObject->Randomize(Weights, Biases);
return new NeuralNetwork(Weights, Biases, neuronLayersLength);
}
// lambda is for the L2 regularization term, and should be a very small fraction (between zero and one) to help prevent overfitting and exploding gradients
// at zero, it provides no regularization and risks exploding gradients, use a clipThreshold, such as 5.0
// training might be slowed with multiple threads because the batch of training data of each thread is smaller and therefore has less to learn from; consider decreasing the number of threads as the epoch count increases and experimenting with the learning rate
// learningRate can decrease by using an algorithm such as: 0.1 ^ (epoch / (float)epochCount) * initialLearningRate
// NOTE: For C++ the developer, this returns false when there is an error of using a different activation, the input and output counts do not match or there is a matrix multiplication error
bool Train(MatrixArray* givenInputs, MatrixArray* desiredOutputs, IActivationMethods* activationObject, float learningRate, float lambda, unsigned int threadCount = 0, float clipThreshold = 0.0f)
{
if (givenInputs->Count() != desiredOutputs->Count())
return false;
threadCount = threadCount ? threadCount : std::thread::hardware_concurrency();
unsigned int mini_batch_size = givenInputs->Count() / threadCount;
if (mini_batch_size > 0)
{
BatchParams** bps = new BatchParams * [threadCount];
for (unsigned int x = 0; x < threadCount; x++)
bps[x] = new BatchParams(this, givenInputs->GetRange(x * mini_batch_size, mini_batch_size), desiredOutputs->GetRange(x * mini_batch_size, mini_batch_size), activationObject, learningRate, lambda, threadCount, clipThreshold);
while (ActiveThreads(bps, threadCount))
std::this_thread::sleep_for(std::chrono::milliseconds(250)); // Sleep 250ms
for (unsigned int x = 0; x < threadCount; x++)
{
if (bps[x]->errorFound) // either could not multiply matrices because the network is incompatible or NaN was encountered
{
for (unsigned int y = 0; y < threadCount; y++)
{
delete bps[y];
bps[y] = nullptr;
}
delete[] bps;
bps = nullptr;
return false;
}
}
for (unsigned int x = 0; x < Weights->Count(); x++)
{
MatrixArray* weights = MatrixArray::CreateMatrixArray();
for (unsigned int i = 0; i < threadCount; i++)
weights->Add(bps[i]->local_weights->GetMatrix(x));
ParameterAveraging(Weights->GetMatrix(x), weights);
delete weights;
}
for (unsigned int x = 0; x < Biases->Count(); x++)
{
MatrixArray* biases = MatrixArray::CreateMatrixArray();
for (unsigned int i = 0; i < threadCount; i++)
biases->Add(bps[i]->local_biases->GetMatrix(x));
ParameterAveraging(Biases->GetMatrix(x), biases);
delete biases;
}
for (unsigned int x = 0; x < threadCount; x++)
{
delete bps[x];
bps[x] = nullptr;
}
delete[] bps;
bps = nullptr;
}
for (unsigned int x = threadCount * mini_batch_size; x < givenInputs->Count(); x++)
{
if (!Train(givenInputs->GetMatrix(x), desiredOutputs->GetMatrix(x), activationObject, learningRate, lambda))
return false;
}
return true;
}
// lambda is for the L2 regularization term, and should be a very small fraction (between zero and one) to help prevent overfitting and exploding gradients
// at zero, it provides no regularization and risks exploding gradients, use a clipThreshold, such as 5.0
// learningRate can decrease by using an algorithm such as: 0.1 ^ (epoch / (float)epochCount) * initialLearningRate
// NOTE: For C++ the developer, this returns false when there is an error of using a different activation or there is a matrix multiplication error
bool Train(Matrix* givenInput, Matrix* desiredOutput, IActivationMethods* activationObject, float learningRate, float lambda, float clipThreshold = 0.0f)
{
MatrixArray* delta_gradient_w = MatrixArray::CreateMatrixArray(Weights->Count());
MatrixArray* delta_gradient_b = MatrixArray::CreateMatrixArray(Biases->Count());
if(!BackPropagate(this, Weights, Biases, givenInput, desiredOutput, activationObject, delta_gradient_w, delta_gradient_b, clipThreshold))
return false;
MatrixArray* new_weights = MatrixArray::CreateMatrixArray();
MatrixArray* new_biases = MatrixArray::CreateMatrixArray();
for (unsigned int i = 0; i < delta_gradient_w->Count(); i++)
{
for (unsigned int j = 0; j < delta_gradient_w->GetMatrix(i)->Rows(); j++)
{
for (unsigned int k = 0; k < delta_gradient_w->GetMatrix(i)->Columns(); k++)
{
float w = Weights->GetMatrix(i)->GetValue(j, k);
float nw = delta_gradient_w->GetMatrix(i)->GetValue(j, k);
delta_gradient_w->GetMatrix(i)->SetValue(j, k, (1 - learningRate * lambda) * w - learningRate * nw);
}
}
new_weights->Add(delta_gradient_w->GetMatrix(i));
}
for (unsigned int i = 0; i < delta_gradient_b->Count(); i++)
{
for (unsigned int j = 0; j < delta_gradient_b->GetMatrix(i)->Rows(); j++)
{
float b = Biases->GetMatrix(i)->GetValue(j, 0);
float nb = delta_gradient_b->GetMatrix(i)->GetValue(j, 0);
delta_gradient_b->GetMatrix(i)->SetValue(j, 0, b - learningRate * nb);
}
new_biases->Add(delta_gradient_b->GetMatrix(i));
}
delete Weights;
Weights = new_weights;
delete Biases;
Biases = new_biases;
delete delta_gradient_b;
delete delta_gradient_w;
return true;
}
unsigned int TrueIndex(Matrix* desiredOutputs)
{
if (desiredOutputs->Rows() == 1) // SCCE (w/Linear output)
return (unsigned int)desiredOutputs->GetValue(0, 0);
else // CCE
return Functions::GetIndexMax(desiredOutputs);
}
unsigned int PredictedIndex(Matrix* givenInputs, IActivationMethods* activationObject)
{
Matrix* ff = FeedForward(givenInputs, activationObject);
unsigned int index = Functions::GetIndexMax(ff);
delete ff;
return index;
}
double CalculateLoss(Matrix* givenInputs, Matrix* desiredOutputs, IActivationMethods* activationObject, unsigned int* predictedIndex, unsigned int* trueIndex)
{
unsigned int iPredicted, iTrue;
Matrix* ff = FeedForward(givenInputs, activationObject);
if (desiredOutputs->Rows() == 1 && givenInputs->Rows() != desiredOutputs->Rows()) // Calculate SCCE (Linear)
{
Functions::SoftMax(ff);
iPredicted = Functions::GetIndexMax(ff);
iTrue = (unsigned int)desiredOutputs->GetValue(0, 0); // holds the index of the true element
}
else
{
iPredicted = Functions::GetIndexMax(ff);
iTrue = Functions::GetIndexMax(desiredOutputs);
}
double loss, a = (iPredicted == iTrue) ? ff->GetValue(iPredicted, 0) : 0;
if (a == 0)
loss = 1.0;
else if (a == 1)
loss = 0.0;
else
loss = -log(a);
if (predictedIndex)
*predictedIndex = iPredicted;
if (trueIndex)
*trueIndex = iTrue;
delete ff;
return loss;
}
static void TrainMiniBatch(void* v)
{
BatchParams* bp = (BatchParams*)v;
double invSqrtThreadCount = 1.0 / sqrt(bp->threadCount);
for (unsigned int x = 0; x < bp->givenInputsBatch->Count(); x++)
{
if (!BackPropagate(bp->network, bp->local_weights, bp->local_biases, bp->givenInputsBatch->GetMatrix(x), bp->desiredOutputsBatch->GetMatrix(x), bp->activationObject, bp->delta_gradient_w, bp->delta_gradient_b, bp->clipThreshold))
{
bp->errorFound = true;
bp->threadActive = false;
return;
}
for (unsigned int i = 0; i < bp->delta_gradient_w->Count(); i++)
{
unsigned r = bp->delta_gradient_w->GetMatrix(i)->Rows();
for (unsigned int row = 0; row < bp->delta_gradient_w->GetMatrix(i)->Rows(); row++)
{
unsigned c = bp->delta_gradient_w->GetMatrix(i)->Columns();
for (unsigned int column = 0; column < bp->delta_gradient_w->GetMatrix(i)->Columns(); column++)
{
float w = bp->local_weights->GetMatrix(i)->GetValue(row, column);
float nw = bp->delta_gradient_w->GetMatrix(i)->GetValue(row, column);
bp->local_weights->GetMatrix(i)->SetValue(row, column, (float)((1 - bp->learningRate * bp->lambda) * w - bp->learningRate * invSqrtThreadCount * nw));
}
}
}
for (unsigned int i = 0; i < bp->delta_gradient_b->Count(); i++)
{
for (unsigned int row = 0; row < bp->delta_gradient_b->GetMatrix(i)->Rows(); row++)
{
float b = bp->local_biases->GetMatrix(i)->GetValue(row, 0);
float nb = bp->delta_gradient_b->GetMatrix(i)->GetValue(row, 0);
bp->local_biases->GetMatrix(i)->SetValue(row, 0, (float)(b - bp->learningRate * invSqrtThreadCount * nb));
}
}
}
bp->threadActive = false;
}
Matrix* FeedForward(Matrix* givenInputs, IActivationMethods* activationObject) const
{
Matrix* given = givenInputs;
for (unsigned int i = 0; i < layerCount - 1; i++)
{
Matrix* temp;
Matrix::Multiply(Weights->GetMatrix(i), given, &temp);
if (temp == nullptr) // Cannot multiply matrices
{
if (given != givenInputs)
{
delete given;
given = nullptr;
}
return nullptr;
}
if (Biases->Count() > 0) // add bias
{
for (unsigned int j = 0; j < temp->Rows(); j++)
for (unsigned int k = 0; k < temp->Columns(); k++)
temp->SetValue(j, k, temp->GetValue(j, k) + Biases->GetMatrix(i)->GetValue(j, 0));
}
if (i < layerCount - 2)
activationObject->ActivationMethod(temp);
else
activationObject->OutputActivationMethod(temp);
if (given != givenInputs)
{
delete given;
given = nullptr;
}
given = temp;
}
return (given == givenInputs ? nullptr : given);
}
private:
static bool ActiveThreads(BatchParams** bps, const unsigned int threadCount)
{
for (unsigned int i = 0; i < threadCount; i++)
if(bps[i]->threadActive)
return true;
return false;
}
static void ParameterAveraging(Matrix* globalParameters, MatrixArray* localParametersOfThreads)
{
// Initialize a temporary matrix of doubles to hold the sum of local parameters
double** sumOfLocalParams = new double* [globalParameters->Rows()];
for(unsigned int i = 0; i < globalParameters->Rows(); i++)
sumOfLocalParams[i] = new double[globalParameters->Columns()];
for(unsigned int i = 0; i < globalParameters->Rows(); i++)
for (unsigned int j = 0; j < globalParameters->Columns(); j++)
sumOfLocalParams[i][j] = 0.0;
for (unsigned int threadId = 0; threadId < localParametersOfThreads->Count(); threadId++)
for (unsigned int row = 0; row < localParametersOfThreads->GetMatrix(threadId)->Rows(); row++)
for (unsigned int column = 0; column < localParametersOfThreads->GetMatrix(threadId)->Columns(); column++)
sumOfLocalParams[row][column] += localParametersOfThreads->GetMatrix(threadId)->GetValue(row, column);
// Update the global parameter matrix using parameter averaging formula
for (unsigned int row = 0; row < globalParameters->Rows(); row++)
for (unsigned int column = 0; column < globalParameters->Columns(); column++)
globalParameters->SetValue(row, column, (float)(sumOfLocalParams[row][column] / localParametersOfThreads->Count()));
// clean up
for (unsigned int i = 0; i < globalParameters->Rows(); i++)
delete[] sumOfLocalParams[i];
delete[] sumOfLocalParams;
}
// returns false when it cannot multiply matrices, which is do to an invalid network configuration
static bool BackPropagate(NeuralNetwork* network, MatrixArray* Weights, MatrixArray* Biases, Matrix* givenInputs, Matrix* desiredOutputs, IActivationMethods* activationObject, MatrixArray* delta_gradient_w, MatrixArray* delta_gradient_b, float clipThreshold) // uses Stochastic Gradient Descent
{
Matrix* activation = givenInputs;
MatrixArray* activations = MatrixArray::CreateMatrixArray();
activations->Add(activation);
MatrixArray* zs = MatrixArray::CreateMatrixArray();
// feed forward
for (unsigned int i = 0; i < network->layerCount - 1; i++)
{
Matrix* z;
Matrix::Multiply(Weights->GetMatrix(i), activation, &z);
if (z == nullptr) // this means it cannot multiply the matrices and cannot proceed
return false;
if (Biases->Count() > 0) // add bias
{
for (unsigned int j = 0; j < z->Rows(); j++)
for (unsigned int k = 0; k < z->Columns(); k++)
z->SetValue(j, k, z->GetValue(j, k) + Biases->GetMatrix(i)->GetValue(j, 0));
}
zs->Add(new Matrix(z));
if (i < network->layerCount - 2)
activationObject->ActivationMethod(z);
else
activationObject->OutputActivationMethod(z);
activation = z;
activations->Add(activation);
}
// backward pass
Matrix* act = activations->GetMatrix(activations->Count() - 1);
Matrix* delta = new Matrix(act->Rows(), act->Columns());
Cost::Delta(act, desiredOutputs, delta);
if (delta_gradient_b->Count() > 0)
delta_gradient_b->Set(new Matrix(delta), delta_gradient_b->Count() - 1); // this will replace the Matrix at the last position
Matrix* transposed = new Matrix(activations->GetMatrix(activations->Count() - 2));
transposed->Transpose();
Matrix* temp;
Matrix::Multiply(delta, transposed, &temp);
delete transposed;
transposed = nullptr;
if (temp == nullptr)
{
delete delta;
delta = nullptr;
return false;
}
delta_gradient_w->Set(temp, delta_gradient_w->Count() - 1);
for (unsigned int i = 2; i < network->layerCount; i++)
{
transposed = new Matrix(Weights->GetMatrix(network->layerCount - i));
transposed->Transpose();
Matrix::Multiply(transposed, delta, &temp);
delete transposed;
transposed = nullptr;
if (temp == nullptr)
{
delete delta;
delta = nullptr;
return false;
}
// multiply the derivative function on "temp"
Matrix* z = zs->GetMatrix(zs->Count() - i);
for (unsigned int j = 0; j < temp->Rows(); j++)
{
for (unsigned int k = 0; k < temp->Columns(); k++)
temp->SetValue(j, k, temp->GetValue(j, k) * activationObject->Derivative(z->GetValue(j, 0)));
}
delta->Copy(temp);
if (delta_gradient_b->Count() > 0)
delta_gradient_b->Set(temp, delta_gradient_b->Count() - i);
transposed = new Matrix(activations->GetMatrix(network->layerCount - i - 1));
transposed->Transpose();
Matrix::Multiply(delta, transposed, &temp);
delete transposed;
transposed = nullptr;
if (temp == nullptr)
{
delete delta;
delta = nullptr;
return false;
}
delta_gradient_w->Set(temp, delta_gradient_w->Count() - i);
}
delete delta;
delta = nullptr;
if (clipThreshold > 0) // if greater than zero then will take care of exploding gradients, but may hamper the network's ability to learn
{
double gradients_norm, scale_factor;
// biases
gradients_norm = 0;
for (unsigned int i = 0; i < delta_gradient_b->Count(); i++)
for (unsigned int j = 0; j < delta_gradient_b->GetMatrix(i)->Rows(); j++)
for (unsigned int k = 0; k < delta_gradient_b->GetMatrix(i)->Columns(); k++)
gradients_norm += delta_gradient_b->GetMatrix(i)->GetValue(j, k) * delta_gradient_b->GetMatrix(i)->GetValue(j, k);
gradients_norm = sqrt(gradients_norm);
if (gradients_norm > clipThreshold)
{
scale_factor = clipThreshold / gradients_norm;
for (unsigned int i = 0; i < delta_gradient_b->Count(); i++)
for (unsigned int j = 0; j < delta_gradient_b->GetMatrix(i)->Rows(); j++)
for (unsigned int k = 0; k < delta_gradient_b->GetMatrix(i)->Columns(); k++)
delta_gradient_b->GetMatrix(i)->SetValue(j, k, (float)(delta_gradient_b->GetMatrix(i)->GetValue(j, k) * scale_factor));
}
// weights
gradients_norm = 0;
for (unsigned int i = 0; i < delta_gradient_w->Count(); i++)
for (unsigned int j = 0; j < delta_gradient_w->GetMatrix(i)->Rows(); j++)
for (unsigned int k = 0; k < delta_gradient_w->GetMatrix(i)->Columns(); k++)
gradients_norm += delta_gradient_w->GetMatrix(i)->GetValue(j, k) * delta_gradient_w->GetMatrix(i)->GetValue(j, k);
gradients_norm = sqrt(gradients_norm);
if (gradients_norm > clipThreshold)
{
scale_factor = clipThreshold / gradients_norm;
for (unsigned int i = 0; i < delta_gradient_w->Count(); i++)
for (unsigned int j = 0; j < delta_gradient_w->GetMatrix(i)->Rows(); j++)
for (unsigned int k = 0; k < delta_gradient_w->GetMatrix(i)->Columns(); k++)
delta_gradient_w->GetMatrix(i)->SetValue(j, k, (float)(delta_gradient_w->GetMatrix(i)->GetValue(j, k) * scale_factor));
}
}
delete zs;
delete activations;
return true;
}
};
class NeuralNetworkJsonProcessor
{
private:
static const std::string floatToString(const float val)
{
std::stringstream tmp;
tmp << val;
return tmp.str();
}
static const std::string createJsonFromFloatArray(const float* values, const unsigned int length)
{
std::string json = "[";
for (unsigned int i = 0; i < length; i++)
{
if (i > 0)
json += ",";
json += floatToString(values[i]);
}
json += "]";
return json;
}
static const std::string createJsonFromMatrix(const Matrix* matrix)
{
std::string json = "{\"data\":";
json += createJsonFromFloatArray(matrix->Data(), matrix->Rows() * matrix->Columns());
json += ",\"rows\":";
json += std::to_string(matrix->Rows());
json += ",\"columns\":";
json += std::to_string(matrix->Columns());
json += "}";
return json;
}
static void createMatrix(int& i, const char* str, Matrix** ppMatrix)
{
*ppMatrix = nullptr;
unsigned int rows = 0, columns = 0, size = 1000, idx = 0;
float* data = new float[size];
std::string tmp;
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] != '{')
goto escape_func;
i++;
while (str[i] && str[i] != '}')
{
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] != '"')
goto escape_func;
i++;
for (tmp = ""; str[i] && str[i] != '"'; i++)
tmp += str[i];
if (str[i] != '"')
goto escape_func;
i++;
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] != ':')
goto escape_func;
i++;
for (; str[i] && std::isspace(str[i]); i++);
if (tmp == "data")
{
if (str[i] != '[')
goto escape_func;
i++;
while (str[i] && str[i] != ']')
{
for (; str[i] && std::isspace(str[i]); i++);
tmp = "";
for (tmp = ""; str[i] && (str[i] == 'E' || str[i] == 'e' || str[i] == '.' || str[i] == '-' || str[i] == '+' || isdigit(str[i])); i++)
tmp += str[i];
if (!str[i] || tmp.length() == 0)
goto escape_func;
if (idx == size)
{
size = size + 1000;
float* d = new float[size];
for (unsigned int j = 0; j < idx; j++)
d[j] = data[j];
delete[] data;
data = d;
}
data[idx++] = std::stof(tmp);
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] == ',')
i++;
}
if (str[i] != ']')
goto escape_func;
i++;
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] == ',')
i++;
else if (!str[i])
goto escape_func;
}
else if (tmp == "rows")
{
for (tmp = ""; str[i] && isdigit(str[i]); i++)
tmp += str[i];
rows = std::stoul(tmp);
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] == ',')
i++;
else if (!str[i])
goto escape_func;
}
else if (tmp == "columns")
{
for (tmp = ""; str[i] && isdigit(str[i]); i++)
tmp += str[i];
columns = std::stoul(tmp);
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] == ',')
i++;
else if (!str[i])
goto escape_func;
}
else
goto escape_func;
}
if (str[i] != '}')
goto escape_func;
i++;
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] == ',')
i++;
*ppMatrix = new Matrix(rows, columns, data);
escape_func:
;
}
public:
static NeuralNetwork* CreateNeuralNetwork(const char* json)
{
MatrixArray* Weights = MatrixArray::CreateMatrixArray();
MatrixArray* Biases = MatrixArray::CreateMatrixArray();
unsigned int LayerCount = 0;
std::string tmp;
const char* str = json;
int i = 0;
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] != '{')
return nullptr;
i++;
while (str[i])
{
if (str[i] == '}') // then end of object found
return new NeuralNetwork(Weights, Biases, LayerCount);
if (str[i] == ',' || std::isspace(str[i]))
{
i++;
continue;
}
if (str[i] == '"')
{
i++;
for (tmp = ""; str[i] && str[i] != '"'; i++)
tmp += str[i];
if (str[i] != '"' || tmp.length() == 0)
break;
i++;
if (tmp == "Weights" || tmp == "Biases")
{
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] == ':')
i++;
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] != '[')
break;
i++;
while (str[i] && str[i] != ']')
{
for (; str[i] && std::isspace(str[i]); i++);
while (str[i] && str[i] == '{')
{
Matrix* m;
createMatrix(i, str, &m);
if (!m)
break;
(tmp == "Weights" ? Weights : Biases)->Add(m);
for (; str[i] && std::isspace(str[i]); i++);
}
}
if (str[i] == ']')
i++;
else
break;
}
else if (tmp == "LayerCount")
{
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] != ':')
break;
i++;
for (; str[i] && std::isspace(str[i]); i++);
for (tmp = ""; str[i] && str[i] != ',' && str[i] != '}' && std::isspace(str[i]) == false; i++)
tmp += str[i];
LayerCount = std::stoul(tmp);
}
else
{
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] != ':')
break;
i++;
for (; str[i] && std::isspace(str[i]); i++);
if (str[i] == '"')
{
i++;
for (; str[i] && str[i] != '"'; i++);
if (str[i] != '"')
break;
if (str[i] == '"')
i++;
}
else if (str[i] == '[')
{
i++;
for (unsigned int count = 1; str[i] && count > 0; i++)
{
if (str[i] == '[')
count++;
else if (str[i] == ']')
count--;
}
}
else if (str[i] == '{')
{
i++;
for (unsigned int count = 1; str[i] && count > 0; i++)
{
if (str[i] == '{')
count++;
else if (str[i] == '}')
count--;
}
}
else if(isdigit(str[i]))
{
i++;
for (; str[i] && (isdigit(str[i]) || str[i] == '.'); i++);
}
else
continue;
}
}
else
break;
if (str[i] == '"' || str[i] == '}')
continue;
i++;
}
return nullptr;
}
static const std::string CreateJsonFromMatrixArray(const MatrixArray* matrixArray)
{
std::string json = "[";
for (unsigned int i = 0; i < matrixArray->Count(); i++)
{
if (i > 0)
json += ",";
json += createJsonFromMatrix(matrixArray->GetMatrix(i));
}
json += "]";
return json;
}
static MatrixArray* CreateMatrixArray(const char *str, int* len)
{
int i;
for (i = 0; str[i] && std::isspace(str[i]); i++);
if (str[i] != '[')
return nullptr;
*len = i;
}
static const std::string CreateJson(const NeuralNetwork *network)
{
std::string json = "{\"Weights\":";
json += CreateJsonFromMatrixArray(network->Weights);
json += ",\"Biases\":";
json += CreateJsonFromMatrixArray(network->Biases);
json += ",\"LayerCount\":";
json += std::to_string(network->LayerCount());
json += "}";
return json;
}
};
BatchParams::~BatchParams()
{
if(threadObj.joinable())
threadObj.join();
delete givenInputsBatch;
delete desiredOutputsBatch;
delete local_weights;
delete local_biases;
delete delta_gradient_w;
delete delta_gradient_b;
}
BatchParams::BatchParams(NeuralNetwork* network, MatrixArray* givenInputsBatch, MatrixArray* desiredOutputsBatch, IActivationMethods* activationObject, float learningRate, float lambda, unsigned int threadCount, float clipThreshold)
: network(network),
givenInputsBatch(givenInputsBatch),
desiredOutputsBatch(desiredOutputsBatch),
activationObject(activationObject),
learningRate(learningRate),
lambda(lambda),
threadCount(threadCount),
clipThreshold(clipThreshold),
local_weights(MatrixArray::CreateMatrixArray(network->Weights)),
local_biases(MatrixArray::CreateMatrixArray(network->Biases)),
delta_gradient_w(MatrixArray::CreateMatrixArray(network->Weights->Count())),
delta_gradient_b(MatrixArray::CreateMatrixArray(network->Biases->Count())),
threadObj(NeuralNetwork::TrainMiniBatch, this),
threadActive(true), errorFound(false) {}
}
#endif // !_CCE_NEURAL_NETWORK_H
The confusion matrix C# code:
using System;
using System.Collections.Generic;
namespace ML
{
public class Confusion
{
public List<Dictionary<string, string>> Samples { get; set; }
public IEnumerable<string> Categories { get; set; }
private Confusion()
{
Samples = new List<Dictionary<string, string>>();
Categories = Array.Empty<string>();
}
public Confusion(IEnumerable<string> categories)
{
Samples = new List<Dictionary<string, string>>();
Categories = categories;
}
public string ToJson()
{
return System.Text.Json.JsonSerializer.Serialize(this);
}
public void AddSample(string truth, string? label = null)
{
Samples.Add(new Dictionary<string, string>
{
{ "t", truth }, // t for true value
{ "l", label ?? "?" } // l for label
});
}
public void Reset()
{
Samples.Clear();
}
public string GetHtmlPage()
{
return $"<!DOCTYPE html><html lang='en'><head><meta charset='UTF-8' /><meta name='viewport' content='width=device-width, initial-scale=1.0' /><title>Confusion Matrix Chart</title><style>body{{margin:0;padding:0;}}</style></head><body><div id='confusion-container'></div><script src='data:text/javascript;base64,ZnVuY3Rpb24gX3R5cGVvZihuKXtyZXR1cm4gX3R5cGVvZj0iZnVuY3Rpb24iPT10eXBlb2YgU3ltYm9sJiYic3ltYm9sIj09dHlwZW9mIFN5bWJvbC5pdGVyYXRvcj9mdW5jdGlvbihuKXtyZXR1cm4gdHlwZW9mIG59OmZ1bmN0aW9uKG4pe3JldHVybiBuJiYiZnVuY3Rpb24iPT10eXBlb2YgU3ltYm9sJiZuLmNvbnN0cnVjdG9yPT09U3ltYm9sJiZuIT09U3ltYm9sLnByb3RvdHlwZT8ic3ltYm9sIjp0eXBlb2Ygbn0sX3R5cGVvZihuKX1mdW5jdGlvbiBfdG9Db25zdW1hYmxlQXJyYXkobil7cmV0dXJuIF9hcnJheVdpdGhvdXRIb2xlcyhuKXx8X2l0ZXJhYmxlVG9BcnJheShuKXx8X3Vuc3VwcG9ydGVkSXRlcmFibGVUb0FycmF5KG4pfHxfbm9uSXRlcmFibGVTcHJlYWQoKX1mdW5jdGlvbiBfbm9uSXRlcmFibGVTcHJlYWQoKXt0aHJvdyBuZXcgVHlwZUVycm9yKCJJbnZhbGlkIGF0dGVtcHQgdG8gc3ByZWFkIG5vbi1pdGVyYWJsZSBpbnN0YW5jZS5cbkluIG9yZGVyIHRvIGJlIGl0ZXJhYmxlLCBub24tYXJyYXkgb2JqZWN0cyBtdXN0IGhhdmUgYSBbU3ltYm9sLml0ZXJhdG9yXSgpIG1ldGhvZC4iKTt9ZnVuY3Rpb24gX2l0ZXJhYmxlVG9BcnJheShuKXtpZih0eXBlb2YgU3ltYm9sIT0idW5kZWZpbmVkIiYmbltTeW1ib2wuaXRlcmF0b3JdIT1udWxsfHxuWyJAQGl0ZXJhdG9yIl0hPW51bGwpcmV0dXJuIEFycmF5LmZyb20obil9ZnVuY3Rpb24gX2FycmF5V2l0aG91dEhvbGVzKG4pe2lmKEFycmF5LmlzQXJyYXkobikpcmV0dXJuIF9hcnJheUxpa2VUb0FycmF5KG4pfWZ1bmN0aW9uIF9jcmVhdGVGb3JPZkl0ZXJhdG9ySGVscGVyKG4sdCl7dmFyIGk9dHlwZW9mIFN5bWJvbCE9InVuZGVmaW5lZCImJm5bU3ltYm9sLml0ZXJhdG9yXXx8blsiQEBpdGVyYXRvciJdLHIsdSxmLGUsbztpZighaSl7aWYoQXJyYXkuaXNBcnJheShuKXx8KGk9X3Vuc3VwcG9ydGVkSXRlcmFibGVUb0FycmF5KG4pKXx8dCYmbiYmdHlwZW9mIG4ubGVuZ3RoPT0ibnVtYmVyIilyZXR1cm4gaSYmKG49aSkscj0wLHU9ZnVuY3Rpb24oKXt9LHtzOnUsbjpmdW5jdGlvbigpe3JldHVybiByPj1uLmxlbmd0aD97ZG9uZTohMH06e2RvbmU6ITEsdmFsdWU6bltyKytdfX0sZTpmdW5jdGlvbihuKXt0aHJvdyBuO30sZjp1fTt0aHJvdyBuZXcgVHlwZUVycm9yKCJJbnZhbGlkIGF0dGVtcHQgdG8gaXRlcmF0ZSBub24taXRlcmFibGUgaW5zdGFuY2UuXG5JbiBvcmRlciB0byBiZSBpdGVyYWJsZSwgbm9uLWFycmF5IG9iamVjdHMgbXVzdCBoYXZlIGEgW1N5bWJvbC5pdGVyYXRvcl0oKSBtZXRob2QuIik7fXJldHVybiBmPSEwLGU9ITEse3M6ZnVuY3Rpb24oKXtpPWkuY2FsbChuKX0sbjpmdW5jdGlvbigpe3ZhciBuPWkubmV4dCgpO3JldHVybiBmPW4uZG9uZSxufSxlOmZ1bmN0aW9uKG4pe2U9ITA7bz1ufSxmOmZ1bmN0aW9uKCl7dHJ5e2Z8fGkucmV0dXJuPT1udWxsfHxpLnJldHVybigpfWZpbmFsbHl7aWYoZSl0aHJvdyBvO319fX1mdW5jdGlvbiBfdW5zdXBwb3J0ZWRJdGVyYWJsZVRvQXJyYXkobix0KXtpZihuKXtpZih0eXBlb2Ygbj09InN0cmluZyIpcmV0dXJuIF9hcnJheUxpa2VUb0FycmF5KG4sdCk7dmFyIGk9T2JqZWN0LnByb3RvdHlwZS50b1N0cmluZy5jYWxsKG4pLnNsaWNlKDgsLTEpO3JldHVybihpPT09Ik9iamVjdCImJm4uY29uc3RydWN0b3ImJihpPW4uY29uc3RydWN0b3IubmFtZSksaT09PSJNYXAifHxpPT09IlNldCIpP0FycmF5LmZyb20obik6aT09PSJBcmd1bWVudHMifHwvXig/OlVpfEkpbnQoPzo4fDE2fDMyKSg/OkNsYW1wZWQpP0FycmF5JC8udGVzdChpKT9fYXJyYXlMaWtlVG9BcnJheShuLHQpOnZvaWQgMH19ZnVuY3Rpb24gX2FycmF5TGlrZVRvQXJyYXkobix0KXsodD09bnVsbHx8dD5uLmxlbmd0aCkmJih0PW4ubGVuZ3RoKTtmb3IodmFyIGk9MCxyPW5ldyBBcnJheSh0KTtpPHQ7aSsrKXJbaV09bltpXTtyZXR1cm4gcn1mdW5jdGlvbiBfY2xhc3NDYWxsQ2hlY2sobix0KXtpZighKG4gaW5zdGFuY2VvZiB0KSl0aHJvdyBuZXcgVHlwZUVycm9yKCJDYW5ub3QgY2FsbCBhIGNsYXNzIGFzIGEgZnVuY3Rpb24iKTt9ZnVuY3Rpb24gX2RlZmluZVByb3BlcnRpZXMobix0KXtmb3IodmFyIGkscj0wO3I8dC5sZW5ndGg7cisrKWk9dFtyXSxpLmVudW1lcmFibGU9aS5lbnVtZXJhYmxlfHwhMSxpLmNvbmZpZ3VyYWJsZT0hMCwidmFsdWUiaW4gaSYmKGkud3JpdGFibGU9ITApLE9iamVjdC5kZWZpbmVQcm9wZXJ0eShuLF90b1Byb3BlcnR5S2V5KGkua2V5KSxpKX1mdW5jdGlvbiBfY3JlYXRlQ2xhc3Mobix0LGkpe3JldHVybiB0JiZfZGVmaW5lUHJvcGVydGllcyhuLnByb3RvdHlwZSx0KSxpJiZfZGVmaW5lUHJvcGVydGllcyhuLGkpLE9iamVjdC5kZWZpbmVQcm9wZXJ0eShuLCJwcm90b3R5cGUiLHt3cml0YWJsZTohMX0pLG59ZnVuY3Rpb24gX3RvUHJvcGVydHlLZXkobil7dmFyIHQ9X3RvUHJpbWl0aXZlKG4sInN0cmluZyIpO3JldHVybiJzeW1ib2wiPT1fdHlwZW9mKHQpP3Q6dCsiIn1mdW5jdGlvbiBfdG9QcmltaXRpdmUobix0KXt2YXIgaSxyO2lmKCJvYmplY3QiIT1fdHlwZW9mKG4pfHwhbilyZXR1cm4gbjtpZihpPW5bU3ltYm9sLnRvUHJpbWl0aXZlXSx2b2lkIDAhPT1pKXtpZihyPWkuY2FsbChuLHR8fCJkZWZhdWx0IiksIm9iamVjdCIhPV90eXBlb2YocikpcmV0dXJuIHI7dGhyb3cgbmV3IFR5cGVFcnJvcigiQEB0b1ByaW1pdGl2ZSBtdXN0IHJldHVybiBhIHByaW1pdGl2ZSB2YWx1ZS4iKTt9cmV0dXJuKCJzdHJpbmciPT09dD9TdHJpbmc6TnVtYmVyKShuKX1BcnJheS5wcm90b3R5cGUuZmxhdHx8KEFycmF5LnByb3RvdHlwZS5mbGF0PWZ1bmN0aW9uKCl7dmFyIG47aWYodGhpcz09bnVsbCl0aHJvdyBuZXcgRXJyb3IoInRoaXMgaXMgbnVsbCBvciBub3QgZGVmaW5lZCIpO3ZhciB0PU9iamVjdCh0aGlzKSxyPXQubGVuZ3RoPj4+MCxpPVtdO2ZvcihuPTA7bjxyO24rKylBcnJheS5pc0FycmF5KHRbbl0pP2kuY29uY2F0KHQuZmxhdCgpKTppLnB1c2godFtuXSk7cmV0dXJuIGl9KTt2YXIgQ29uZnVzaW9uTWF0cml4Q2hhcnQ9ZnVuY3Rpb24oKXtmdW5jdGlvbiBuKHQsaSxyKXt2YXIgZix1O19jbGFzc0NhbGxDaGVjayh0aGlzLG4pO3ZhciBzPXIubGVuZ3RoKzEsZT1NYXRoLm1pbih3aW5kb3cuaW5uZXJXaWR0aCx3aW5kb3cuaW5uZXJIZWlnaHQpLyhzKzEpLG89ZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgidGFibGUiKTtvLnN0eWxlLmJvcmRlckNvbGxhcHNlPSJjb2xsYXBzZSI7by5zdHlsZS50ZXh0QWxpZ249ImNlbnRlciI7by5zdHlsZS5tYXJnaW5MZWZ0PWUrInB4IjtvLnN0eWxlLm1hcmdpblRvcD1lKyJweCI7dC5hcHBlbmRDaGlsZChvKTtmPWRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoImRpdiIpO2YuaW5uZXJIVE1MPSJwcmVkaWN0ZWQgY2F0ZWdvcnkiO2Yuc3R5bGUucG9zaXRpb249ImFic29sdXRlIjtmLnN0eWxlLmZvbnRTaXplPSJ4LWxhcmdlIjtmLnN0eWxlLnRvcD0iMHB4IjtmLnN0eWxlLmxlZnQ9ZSoxLjUrInB4IjtmLnN0eWxlLmhlaWdodD1lKyJweCI7Zi5zdHlsZS5kaXNwbGF5PSJmbGV4IjtmLnN0eWxlLmFsaWduSXRlbXM9ImNlbnRlciI7Zi5zdHlsZS5tYXJnaW5MZWZ0PWUvMisicHgiO3QuYXBwZW5kQ2hpbGQoZik7dT1kb2N1bWVudC5jcmVhdGVFbGVtZW50KCJkaXYiKTt1LmlubmVySFRNTD0idHJ1ZSBjYXRlZ29yeSI7dS5zdHlsZS5wb3NpdGlvbj0iYWJzb2x1dGUiO3Uuc3R5bGUuZm9udFNpemU9IngtbGFyZ2UiO3Uuc3R5bGUudG9wPWUqMisicHgiO3Uuc3R5bGUubGVmdD0iMHB4Ijt1LnN0eWxlLnRyYW5zZm9ybT0idHJhbnNsYXRlKC01MCUpIHJvdGF0ZSgtOTBkZWcpIjt1LnN0eWxlLmhlaWdodD1lKyJweCI7dS5zdHlsZS5kaXNwbGF5PSJmbGV4Ijt1LnN0eWxlLmFsaWduSXRlbXM9ImNlbnRlciI7dS5zdHlsZS5tYXJnaW5MZWZ0PWUvMisicHgiO3QuYXBwZW5kQ2hpbGQodSk7dGhpcy5nbyhpLHMscixvLGUpfXJldHVybiBfY3JlYXRlQ2xhc3Mobixbe2tleToiZ28iLHZhbHVlOmZ1bmN0aW9uKG4sdCxpLHIsdSl7Zm9yKHZhciBjLGUsZixzLGE9ZnVuY3Rpb24obix0LGkpe2Zvcih2YXIgZSxzLG8sZix1PVtdLHI9MDtyPG47cisrKWZvcih1W3JdPVtdLGY9MDtmPG47ZisrKXVbcl1bZl09MDtlPV9jcmVhdGVGb3JPZkl0ZXJhdG9ySGVscGVyKHQpO3RyeXtmb3IoZS5zKCk7IShzPWUubigpKS5kb25lOylvPXMudmFsdWUsdVtpLmluZGV4T2Yoby50KSsxXVtpLmluZGV4T2Yoby5sKSsxXSsrfWNhdGNoKGgpe2UuZShoKX1maW5hbGx5e2UuZigpfWZvcihyPTE7cjxuO3IrKylmb3IoZj0xO2Y8bjtmKyspdVswXVtmXSs9dVtyXVtmXSx1W3JdWzBdKz11W3JdW2ZdO2ZvcihyPTE7cjxuO3IrKyl1WzBdW3JdLT11W3JdWzBdLHVbMF1bcl0+MCYmKHVbMF1bcl09IisiK3VbMF1bcl0pO3JldHVybiB1WzBdWzBdPSIiLHV9LHY9ZnVuY3Rpb24obix0LGkpe3JldHVybihpLW4pLyh0LW4pfSxoPWEodCxuLGkpLGw9aC5zbGljZSgxKS5tYXAoZnVuY3Rpb24obil7cmV0dXJuIG4uc2xpY2UoMSl9KS5mbGF0KCkseT1NYXRoLm1pbi5hcHBseShNYXRoLF90b0NvbnN1bWFibGVBcnJheShsKSkscD1NYXRoLm1heC5hcHBseShNYXRoLF90b0NvbnN1bWFibGVBcnJheShsKSksbz0wO288dDtvKyspZm9yKGM9ZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgidHIiKSxyLmFwcGVuZENoaWxkKGMpLGU9MDtlPHQ7ZSsrKXtpZihmPWRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoInRkIiksZi5zdHlsZS53aWR0aD11KyJweCIsZi5zdHlsZS5oZWlnaHQ9dSsicHgiLGYuc3R5bGUucGFkZGluZz0iMCIsbz09MCYmZT4wKXtmLmFwcGVuZENoaWxkKGRvY3VtZW50LmNyZWF0ZVRleHROb2RlKGlbZS0xXSkpO2Yuc3R5bGUub3ZlcmZsb3c9ImhpZGRlbiI7Zi5zdHlsZS52ZXJ0aWNhbEFsaWduPSJjZW50ZXIiO2Yuc3R5bGUudGV4dEFsaWduPSJjZW50ZXIiO3ZhciBzPTIqaFtvXVtlXS9oW2VdW29dLHc9cz49MD9zKjI1NTowLGI9czw9MD8tcyoyNTU6MDtmLnN0eWxlLmNvbG9yPSJyZ2IoIit3KyIsIitiKyIsMCkifWU9PTAmJm8+MCYmKGYuYXBwZW5kQ2hpbGQoZG9jdW1lbnQuY3JlYXRlVGV4dE5vZGUoaVtvLTFdKSksZi5zdHlsZS5vdmVyZmxvdz0iaGlkZGVuIixmLnN0eWxlLnZlcnRpY2FsQWxpZ249ImNlbnRlciIsZi5zdHlsZS50ZXh0QWxpZ249ImNlbnRlciIpO2YuYXBwZW5kQ2hpbGQoZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgiYnIiKSk7Zi5hcHBlbmRDaGlsZChkb2N1bWVudC5jcmVhdGVUZXh0Tm9kZShoW29dW2VdKSk7bz4wJiZlPjAmJihzPXYoeSxwLGhbb11bZV0pLGYuc3R5bGUuYmFja2dyb3VuZENvbG9yPW89PWU/InJnYmEoMCwyNTUsMCwiK3MrIikiOiJyZ2JhKDI1NSwwLDAsIitzKyIpIik7Yy5hcHBlbmRDaGlsZChmKX19fV0pfSgpOw=='></script><script type='text/javascript'>var obj=JSON.parse('{ToJson()}');new ConfusionMatrixChart(document.getElementById('confusion-container'),obj.Samples,obj.Categories);</script></body></html>";
}
}
}
Here are the C# exports for the DLL.
using System.Runtime.InteropServices;
namespace ML
{
static class libCceNeuralNetwork
{
private const string dllName = "libCceNeuralNetwork.dll";
[DllImport(dllName, EntryPoint = "WinCreateNetwork", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr CreateNetwork(uint[] neuronLayers, uint layerCount, uint activationObjectIndex, int useBiases);
[DllImport(dllName, EntryPoint = "WinCreateNetworkFromJson", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr CreateNetworkFromJson(string json);
[DllImport(dllName, EntryPoint = "WinCreateJsonFileFromNetwork", CallingConvention = CallingConvention.StdCall)]
public static extern uint CreateJsonFileFromNetwork(IntPtr pNeuralNetwork, string jsonFileName);
[DllImport(dllName, EntryPoint = "WinCreateMatrixArray", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr CreateMatrixArray();
[DllImport(dllName, EntryPoint = "WinGetMatrixArrayCount", CallingConvention = CallingConvention.StdCall)]
public static extern uint GetMatrixArrayCount(IntPtr pMatrixArray);
[DllImport(dllName, EntryPoint = "WinGetMatrix", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr GetMatrix(IntPtr pMatrixArray, uint index);
[DllImport(dllName, EntryPoint = "WinGetMatrixValue", CallingConvention = CallingConvention.StdCall)]
public static extern float GetMatrixValue(IntPtr pMatrix, uint row, uint column = 0);
[DllImport(dllName, EntryPoint = "WinAddMatrixArrayData", CallingConvention = CallingConvention.StdCall)]
public static extern void AddMatrixArrayData(IntPtr pMatrixArray, float[] pValues, uint rows, uint columns = 1);
[DllImport(dllName, EntryPoint = "WinGetActivationObjectIndex", CallingConvention = CallingConvention.StdCall)]
public static extern uint GetActivationObjectIndex(string activationName);
[DllImport(dllName, EntryPoint = "WinShuffleParallelArrays", CallingConvention = CallingConvention.StdCall)]
public static extern void ShuffleParallelArrays(IntPtr matrixArray1, IntPtr matrixArray2);
[DllImport(dllName, EntryPoint = "WinTrain", CallingConvention = CallingConvention.StdCall)]
public static extern int Train(IntPtr pNeuralNetwork, IntPtr pInputsMatrixArray, IntPtr pDesiredOutputsMatrixArray, uint activationObjectIndex, float learningRate, float lambda, uint threadCount, float clipThreshold = 0f);
[DllImport(dllName, EntryPoint = "WinTrueIndex", CallingConvention = CallingConvention.StdCall)]
public static extern uint TrueIndex(IntPtr pNeuralNetwork, IntPtr pDesiredOutputsMatrix);
[DllImport(dllName, EntryPoint = "WinPredictedIndex", CallingConvention = CallingConvention.StdCall)]
public static extern uint PredictedIndex(IntPtr pNeuralNetwork, IntPtr pGivenInputsMatrix, uint activationObjectIndex);
[DllImport(dllName, EntryPoint = "WinCalculateLoss", CallingConvention = CallingConvention.StdCall)]
public static extern double CalculateLoss(IntPtr pNeuralNetwork, IntPtr pGivenInputsMatrix, IntPtr pDesiredOutputsMatrix, uint activationObjectIndex, out uint predictedIndex, out uint trueIndex);
[DllImport(dllName, EntryPoint = "WinFreeMatrixArray", CallingConvention = CallingConvention.StdCall)]
public static extern void FreeMatrixArray(IntPtr pMatrixArray);
[DllImport(dllName, EntryPoint = "WinFreeNetwork", CallingConvention = CallingConvention.StdCall)]
public static extern void FreeNetwork(IntPtr pNeuralNetwork);
}
}
Code specifically for the Windows DLL, if so desired.
#include <windows.h>
#include "cce_neural_network.h"
#include <fstream>
#include <string>
#define MAX_ACTIVATIONS 8
#define ACTIVATION_LEAKY_RELU_SOFTMAX 0
#define ACTIVATION_RELU_SOFTMAX 1
#define ACTIVATION_TANH_SOFTMAX 2
#define ACTIVATION_SIGMOID_SOFTMAX 3
#define ACTIVATION_LEAKY_RELU_LINEAR 4
#define ACTIVATION_RELU_LINEAR 5
#define ACTIVATION_TANH_LINEAR 6
#define ACTIVATION_SIGMOID_LINEAR 7
ML::IActivationMethods* activationObjects[MAX_ACTIVATIONS];
BOOL APIENTRY DllMain( HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved )
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
// SoftMax final layer activation for CCE
activationObjects[ACTIVATION_LEAKY_RELU_SOFTMAX] = new ML::ActivationLeakyReLUSoftMax(0.1f);
activationObjects[ACTIVATION_RELU_SOFTMAX] = new ML::ActivationReLUSoftMax();
activationObjects[ACTIVATION_TANH_SOFTMAX] = new ML::ActivationTanhSoftMax();
activationObjects[ACTIVATION_SIGMOID_SOFTMAX] = new ML::ActivationSigmoidSoftMax();
// Linear final layer activation for SCCE
activationObjects[ACTIVATION_LEAKY_RELU_LINEAR] = new ML::ActivationLeakyReLULinear(0.1f);
activationObjects[ACTIVATION_RELU_LINEAR] = new ML::ActivationReLULinear();
activationObjects[ACTIVATION_TANH_LINEAR] = new ML::ActivationTanhLinear();
activationObjects[ACTIVATION_SIGMOID_LINEAR] = new ML::ActivationSigmoidLinear();
break;
case DLL_THREAD_ATTACH:
break;
case DLL_THREAD_DETACH:
break;
case DLL_PROCESS_DETACH:
for (int i = 0; i < MAX_ACTIVATIONS; i++)
delete activationObjects[i];
break;
}
return TRUE;
}
extern "C"
{
ML::NeuralNetwork* __stdcall WinCreateNetwork(const unsigned int* neuronLayers, unsigned int layerCount, unsigned int activationObjectIndex, int useBiases)
{
if(activationObjectIndex < MAX_ACTIVATIONS)
return ML::NeuralNetwork::CreateNeuralNetwork(neuronLayers, layerCount, activationObjects[activationObjectIndex], (bool)useBiases);
return nullptr;
}
ML::NeuralNetwork* __stdcall WinCreateNetworkFromJson(const char* json)
{
return ML::NeuralNetworkJsonProcessor::CreateNeuralNetwork(json);
}
unsigned int __stdcall WinCreateJsonFileFromNetwork(const ML::NeuralNetwork* pNeuralNetwork, const char* jsonFileName)
{
std::ofstream jsonFile(jsonFileName);
if (jsonFile.is_open())
{
jsonFile << ML::NeuralNetworkJsonProcessor::CreateJson(pNeuralNetwork);
jsonFile.close();
return 1;
}
return 0;
}
ML::MatrixArray* __stdcall WinCreateMatrixArray()
{
return ML::MatrixArray::CreateMatrixArray();
}
unsigned int __stdcall WinGetMatrixArrayCount(ML::MatrixArray* pMatrixArray)
{
return pMatrixArray->Count();
}
ML::Matrix* __stdcall WinGetMatrix(ML::MatrixArray* pMatrixArray, unsigned int index)
{
return pMatrixArray->GetMatrix(index);
}
float __stdcall WinGetMatrixValue(ML::Matrix* pMatrix, unsigned int row, unsigned int column)
{
return pMatrix->GetValue(row, column);
}
void __stdcall WinAddMatrixArrayData(ML::MatrixArray* pMatrixArray, float* pValues, unsigned int rows, unsigned int columns)
{
pMatrixArray->Add(new ML::Matrix(rows, columns, pValues));
}
unsigned int __stdcall WinGetActivationObjectIndex(char* activationName)
{
std::string name = activationName;
if (name == "LeakyReLUSoftMax")
return ACTIVATION_LEAKY_RELU_SOFTMAX;
if (name == "ReLUSoftMax")
return ACTIVATION_RELU_SOFTMAX;
if (name == "TanhSoftMax")
return ACTIVATION_TANH_SOFTMAX;
if (name == "SigmoidSoftMax")
return ACTIVATION_SIGMOID_SOFTMAX;
if (name == "LeakyReLULinear")
return ACTIVATION_LEAKY_RELU_LINEAR;
if (name == "ReLULinear")
return ACTIVATION_RELU_LINEAR;
if (name == "TanhLinear")
return ACTIVATION_TANH_LINEAR;
if (name == "SigmoidLinear")
return ACTIVATION_SIGMOID_LINEAR;
return 0xFFFFFFFF;
}
void __stdcall WinShuffleParallelArrays(ML::MatrixArray* matrixArray1, ML::MatrixArray* matrixArray2)
{
ML::MatrixArray::ShuffleParallelArrays(matrixArray1, matrixArray2);
}
int __stdcall WinTrain(ML::NeuralNetwork* pNeuralNetwork, ML::MatrixArray* inputsMatrixArray, ML::MatrixArray* desiredOutputsMatrixArray, unsigned int activationObjectIndex, float learningRate, float lambda, unsigned int threadCount, float clipThreshold)
{
if (activationObjectIndex < MAX_ACTIVATIONS)
return pNeuralNetwork->Train(inputsMatrixArray, desiredOutputsMatrixArray, activationObjects[activationObjectIndex], learningRate, lambda, threadCount, clipThreshold);
return 0;
}
unsigned int __stdcall WinTrueIndex(ML::NeuralNetwork* pNeuralNetwork, ML::Matrix* desiredOutputsMatrix)
{
return pNeuralNetwork->TrueIndex(desiredOutputsMatrix);
}
unsigned int __stdcall WinPredictedIndex(ML::NeuralNetwork* pNeuralNetwork, ML::Matrix* givenInputsMatrix, unsigned int activationObjectIndex)
{
if (activationObjectIndex < MAX_ACTIVATIONS)
return pNeuralNetwork->PredictedIndex(givenInputsMatrix, activationObjects[activationObjectIndex]);
return 0xFFFFFFFF;
}
double __stdcall WinCalculateLoss(ML::NeuralNetwork* pNeuralNetwork, ML::Matrix* givenInputsMatrix, ML::Matrix* desiredOutputsMatrix, unsigned int activationObjectIndex, unsigned int* predictedIndex, unsigned int* trueIndex)
{
if (activationObjectIndex < MAX_ACTIVATIONS)
return pNeuralNetwork->CalculateLoss(givenInputsMatrix, desiredOutputsMatrix, activationObjects[activationObjectIndex], predictedIndex, trueIndex);
return 0xFFFFFFFF;
}
void __stdcall WinFreeMatrixArray(ML::MatrixArray* pMatrixArray)
{
delete pMatrixArray;
}
void __stdcall WinFreeNetwork(ML::NeuralNetwork* pNeuralNetwork)
{
delete pNeuralNetwork;
}
}
And, the DEF file to export the functions from the Windows DLL.
LIBRARY libCceNeuralNetwork
EXPORTS
WinCreateNetwork @1
WinCreateNetworkFromJson @2
WinCreateJsonFileFromNetwork @3
WinCreateMatrixArray @4
WinGetMatrixArrayCount @5
WinGetMatrix @6
WinGetMatrixValue @7
WinAddMatrixArrayData @8
WinGetActivationObjectIndex @9
WinShuffleParallelArrays @10
WinTrain @11
WinTrueIndex @12
WinPredictedIndex @13
WinCalculateLoss @14
WinFreeMatrixArray @15
WinFreeNetwork @16