machine_learning/neural_network.cpp
/*******************************************************
* Copyright (c) 2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* https://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <arrayfire.h>
#include <math.h>
#include <stdio.h>
#include <af/util.h>
#include <string>
#include <vector>
#include "mnist_common.h"
using namespace af;
using std::vector;
std::string toStr(const dtype dt) {
switch (dt) {
case f32: return "f32";
case f16: return "f16";
default: return "N/A";
}
}
float accuracy(const array &predicted, const array &target) {
array val, plabels, tlabels;
max(val, tlabels, target, 1);
max(val, plabels, predicted, 1);
return 100 * count<float>(plabels == tlabels) / tlabels.elements();
}
// Derivative of the activation function
array deriv(const array &out) { return out * (1 - out); }
// Cost function
double error(const array &out, const array &pred) {
array dif = (out - pred);
return sqrt((double)(sum<float>(dif * dif)));
}
class ann {
private:
int num_layers;
vector<array> weights;
dtype datatype;
// Add bias input to the output from previous layer
array add_bias(const array &in);
vector<array> forward_propagate(const array &input);
void back_propagate(const vector<array> signal, const array &pred,
const double &alpha);
public:
// Create a network with given parameters
ann(vector<int> layers, double range, dtype dt = f32);
// Output after single pass of forward propagation
array predict(const array &input);
// Method to train the neural net
double train(const array &input, const array &target, double alpha = 1.0,
int max_epochs = 300, int batch_size = 100,
double maxerr = 1.0, bool verbose = false);
};
array ann::add_bias(const array &in) {
// Bias input is added on top of given input
return join(1, constant(1, in.dims(0), 1, datatype), in);
}
vector<array> ann::forward_propagate(const array &input) {
// Get activations at each layer
vector<array> signal(num_layers);
signal[0] = input;
for (int i = 0; i < num_layers - 1; i++) {
array in = add_bias(signal[i]);
array out = matmul(in, weights[i]);
signal[i + 1] = sigmoid(out);
}
return signal;
}
void ann::back_propagate(const vector<array> signal, const array &target,
const double &alpha) {
// Get error for output layer
array out = signal[num_layers - 1];
array err = (out - target);
int m = target.dims(0);
for (int i = num_layers - 2; i >= 0; i--) {
array in = add_bias(signal[i]);
array delta = (deriv(out) * err).T();
// Adjust weights
array tg = alpha * matmul(delta, in);
array grad = -(tg) / m;
weights[i] += grad.T();
// Input to current layer is output of previous
out = signal[i];
err = matmulTT(delta, weights[i]);
// Remove the error of bias and propagate backward
err = err(span, seq(1, out.dims(1)));
}
}
ann::ann(vector<int> layers, double range, dtype dt)
: num_layers(layers.size()), weights(layers.size() - 1), datatype(dt) {
std::cout
<< "Initializing weights using a random uniformly distribution between "
<< -range / 2 << " and " << range / 2 << " at precision "
<< toStr(datatype) << std::endl;
for (int i = 0; i < num_layers - 1; i++) {
weights[i] = range * randu(layers[i] + 1, layers[i + 1]) - range / 2;
if (datatype != f32) weights[i] = weights[i].as(datatype);
}
}
array ann::predict(const array &input) {
vector<array> signal = forward_propagate(input);
array out = signal[num_layers - 1];
return out;
}
double ann::train(const array &input, const array &target, double alpha,
int max_epochs, int batch_size, double maxerr, bool verbose) {
const int num_samples = input.dims(0);
const int num_batches = num_samples / batch_size;
double err = 0;
// Training the entire network
for (int i = 0; i < max_epochs; i++) {
for (int j = 0; j < num_batches - 1; j++) {
int st = j * batch_size;
int en = st + batch_size - 1;
array x = input(seq(st, en), span);
array y = target(seq(st, en), span);
// Propagate the inputs forward
vector<array> signals = forward_propagate(x);
array out = signals[num_layers - 1];
// Propagate the error backward
back_propagate(signals, y, alpha);
}
// Validate with last batch
int st = (num_batches - 1) * batch_size;
int en = num_samples - 1;
array out = predict(input(seq(st, en), span));
err = error(out, target(seq(st, en), span));
// Check if convergence criteria has been met
if (err < maxerr) {
printf("Converged on Epoch: %4d\n", i + 1);
return err;
}
if (verbose) {
if ((i + 1) % 10 == 0)
printf("Epoch: %4d, Error: %0.4f\n", i + 1, err);
}
}
return err;
}
int ann_demo(bool console, int perc, const dtype dt) {
printf("** ArrayFire ANN Demo **\n\n");
array train_images, test_images;
array train_target, test_target;
int num_classes, num_train, num_test;
// Load mnist data
float frac = (float)(perc) / 100.0;
setup_mnist<true>(&num_classes, &num_train, &num_test, train_images,
test_images, train_target, test_target, frac);
if (dt != f32) {
train_images = train_images.as(dt);
test_images = test_images.as(dt);
train_target = train_target.as(dt);
}
int feature_size = train_images.elements() / num_train;
// Reshape images into feature vectors
array train_feats = moddims(train_images, feature_size, num_train).T();
array test_feats = moddims(test_images, feature_size, num_test).T();
train_target = train_target.T();
test_target = test_target.T();
// Network parameters
vector<int> layers;
layers.push_back(train_feats.dims(1));
layers.push_back(100);
layers.push_back(50);
layers.push_back(num_classes);
// Create network: architecture, range, datatype
ann network(layers, 0.05, dt);
// Train network
timer::start();
network.train(train_feats, train_target,
2.0, // learning rate / alpha
250, // max epochs
100, // batch size
0.5, // max error
true); // verbose
double train_time = timer::stop();
// Run the trained network and test accuracy.
array train_output = network.predict(train_feats);
array test_output = network.predict(test_feats);
// Benchmark prediction
timer::start();
for (int i = 0; i < 100; i++) { network.predict(test_feats); }
double test_time = timer::stop() / 100;
printf("\nTraining set:\n");
printf("Accuracy on training data: %2.2f\n",
accuracy(train_output, train_target));
printf("\nTest set:\n");
printf("Accuracy on testing data: %2.2f\n",
accuracy(test_output, test_target));
printf("\nTraining time: %4.4lf s\n", train_time);
printf("Prediction time: %4.4lf s\n\n", test_time);
if (!console) {
// Get 20 random test images.
test_output = test_output.T();
display_results<true>(test_images, test_output, test_target.T(), 20);
}
return 0;
}
int main(int argc, char **argv) {
// usage: neural_network_xxx (device) (console on/off) (percentage
// training/test set) (f32|f16)
int device = argc > 1 ? atoi(argv[1]) : 0;
bool console = argc > 2 ? argv[2][0] == '-' : false;
int perc = argc > 3 ? atoi(argv[3]) : 60;
if (perc < 0 || perc > 100) {
std::cerr << "Bad perc arg: " << perc << std::endl;
return EXIT_FAILURE;
}
std::string dts = argc > 4 ? argv[4] : "f32";
dtype dt = f32;
if (dts == "f16")
dt = f16;
else if (dts != "f32") {
std::cerr << "Unsupported datatype " << dts << ". Supported: f32 or f16"
<< std::endl;
return EXIT_FAILURE;
}
if (dts == "f16" && !af::isHalfAvailable(device)) {
std::cerr << "Half not available for device " << device << std::endl;
return EXIT_FAILURE;
}
try {
af::setDevice(device);
return ann_demo(console, perc, dt);
} catch (af::exception &ae) { std::cerr << ae.what() << std::endl; }
return 0;
}
af::matmul
AFAPI array matmul(const array &lhs, const array &rhs, const matProp optLhs=AF_MAT_NONE, const matProp optRhs=AF_MAT_NONE)
Matrix multiply of two arrays.
af::range
AFAPI array range(const dim4 &dims, const int seq_dim=-1, const dtype ty=f32)
af::seq
seq is used to create sequences for indexing af::array
Definition: seq.h:46
util.h
af::isHalfAvailable
AFAPI bool isHalfAvailable(const int device)
Queries the current device for half precision floating point support.
af::info
AFAPI void info()
af::array::as
const array as(dtype type) const
Converts the array into another type.
af::grad
AFAPI void grad(array &dx, array &dy, const array &in)
C++ Interface for calculating the gradients.
af::constant
array constant(T val, const dim4 &dims, const dtype ty=(af_dtype) dtype_traits< T >::ctype)
af::join
AFAPI array join(const int dim, const array &first, const array &second)
Join 2 arrays along dim.
af::moddims
AFAPI array moddims(const array &in, const unsigned ndims, const dim_t *const dims)
af::setDevice
AFAPI void setDevice(const int device)
Sets the current device.
af::array
A multi dimensional data container.
Definition: array.h:35
af
Definition: algorithm.h:15
af::max
AFAPI array max(const array &in, const int dim=-1)
C++ Interface for maximum values in an array.
af::array::elements
dim_t elements() const
Get the total number of elements across all dimensions of the array.
af::matmulTT
AFAPI array matmulTT(const array &lhs, const array &rhs)
Matrix multiply of two arrays.
af_dtype
af_dtype
Definition: defines.h:210
f16
@ f16
16-bit floating point value
Definition: defines.h:228
af::sqrt
AFAPI array sqrt(const array &in)
C++ Interface for square root of input.
af::randu
AFAPI array randu(const dim4 &dims, const dtype ty, randomEngine &r)
af::exception
An ArrayFire exception class.
Definition: exception.h:29
af::span
AFAPI seq span
A special value representing the entire axis of an af::array.
af::array::dims
dim4 dims() const
Get dimensions of the array.
af::sigmoid
AFAPI array sigmoid(const array &in)
C++ Interface for calculating sigmoid function of an array.
arrayfire.h
af::sync
AFAPI void sync(const int device=-1)
Blocks until the device is finished processing.
af::exception::what
virtual const char * what() const
Returns an error message for the exception in a string format.
Definition: exception.h:60
af::array::T
array T() const
Get the transposed the array.
f32
@ f32
32-bit floating point values
Definition: defines.h:211