A high-performance general-purpose compute library
machine_learning/softmax_regression.cpp
/*******************************************************
* Copyright (c) 2014, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <arrayfire.h>
#include <math.h>
#include <stdio.h>
#include <af/util.h>
#include <string>
#include <vector>
#include "mnist_common.h"
using namespace af;
float accuracy(const array &predicted, const array &target) {
array val, plabels, tlabels;
max(val, tlabels, target, 1);
max(val, plabels, predicted, 1);
return 100 * count<float>(plabels == tlabels) / tlabels.elements();
}
float abserr(const array &predicted, const array &target) {
return 100 * sum<float>(abs(predicted - target)) / predicted.elements();
}
array divide(const array &a, const array &b) { return a / b; }
// Predict based on given parameters
array predict(const array &X, const array &Weights) {
array Z = matmul(X, Weights);
array EZ = exp(Z);
array nrm = sum(EZ, 1);
return batchFunc(EZ, nrm, divide);
}
void cost(array &J, array &dJ, const array &Weights, const array &X,
const array &Y, double lambda = 1.0) {
// Number of samples
int m = Y.dims(0);
// Make the lambda corresponding to Weights(0) == 0
array lambdat = constant(lambda, Weights.dims());
// No regularization for bias weights
lambdat(0, span) = 0;
// Get the prediction
array H = predict(X, Weights);
// Cost of misprediction
array Jerr = -sum(Y * log(H));
// Regularization cost
array Jreg = 0.5 * sum(lambdat * Weights * Weights);
// Total cost
J = (Jerr + Jreg) / m;
// Find the gradient of cost
array D = (H - Y);
dJ = (matmulTN(X, D) + lambdat * Weights) / m;
}
array train(const array &X, const array &Y, double alpha = 0.1,
double lambda = 1.0, double maxerr = 0.01, int maxiter = 1000,
bool verbose = false) {
// Initialize parameters to 0
array Weights = constant(0, X.dims(1), Y.dims(1));
array J, dJ;
float err = 0;
for (int i = 0; i < maxiter; i++) {
// Get the cost and gradient
cost(J, dJ, Weights, X, Y, lambda);
err = max<float>(abs(J));
if (err < maxerr) {
printf("Iteration %4d Err: %.4f\n", i + 1, err);
printf("Training converged\n");
return Weights;
}
if (verbose && ((i + 1) % 10 == 0)) {
printf("Iteration %4d Err: %.4f\n", i + 1, err);
}
// Update the parameters via gradient descent
Weights = Weights - alpha * dJ;
}
printf("Training stopped after %d iterations\n", maxiter);
return Weights;
}
void benchmark_softmax_regression(const array &train_feats,
const array &train_targets,
const array test_feats) {
timer::start();
array Weights = train(train_feats, train_targets, 0.1, 1.0, 0.01, 1000);
printf("Training time: %4.4lf s\n", timer::stop());
timer::start();
const int iter = 100;
for (int i = 0; i < iter; i++) {
array test_outputs = predict(test_feats, Weights);
test_outputs.eval();
}
printf("Prediction time: %4.4lf s\n", timer::stop() / iter);
}
// Demo of one vs all logistic regression
int logit_demo(bool console, int perc) {
array train_images, train_targets;
array test_images, test_targets;
int num_train, num_test, num_classes;
// Load mnist data
float frac = (float)(perc) / 100.0;
setup_mnist<true>(&num_classes, &num_train, &num_test, train_images,
test_images, train_targets, test_targets, frac);
// Reshape images into feature vectors
int feature_length = train_images.elements() / num_train;
array train_feats = moddims(train_images, feature_length, num_train).T();
array test_feats = moddims(test_images, feature_length, num_test).T();
train_targets = train_targets.T();
test_targets = test_targets.T();
// Add a bias that is always 1
train_feats = join(1, constant(1, num_train, 1), train_feats);
test_feats = join(1, constant(1, num_test, 1), test_feats);
// Train logistic regression parameters
array Weights =
train(train_feats, train_targets,
0.1, // learning rate (aka alpha)
1.0, // regularization constant (aka weight decay, aka lamdba)
0.01, // maximum error
1000, // maximum iterations
true); // verbose
// Predict the results
array train_outputs = predict(train_feats, Weights);
array test_outputs = predict(test_feats, Weights);
printf("Accuracy on training data: %2.2f\n",
accuracy(train_outputs, train_targets));
printf("Accuracy on testing data: %2.2f\n",
accuracy(test_outputs, test_targets));
printf("Maximum error on testing data: %2.2f\n",
abserr(test_outputs, test_targets));
benchmark_softmax_regression(train_feats, train_targets, test_feats);
if (!console) {
test_outputs = test_outputs.T();
// Get 20 random test images.
display_results<true>(test_images, test_outputs, test_targets.T(), 20);
}
return 0;
}
int main(int argc, char **argv) {
int device = argc > 1 ? atoi(argv[1]) : 0;
bool console = argc > 2 ? argv[2][0] == '-' : false;
int perc = argc > 3 ? atoi(argv[3]) : 60;
try {
af::setDevice(device);
return logit_demo(console, perc);
} catch (af::exception &ae) { std::cerr << ae.what() << std::endl; }
return 0;
}
A multi dimensional data container.
Definition: array.h:37
dim4 dims() const
Get dimensions of the array.
void eval() const
Evaluate any JIT expressions to generate data for the array.
array T() const
Get the transposed the array.
dim_t elements() const
Get the total number of elements across all dimensions of the array.
An ArrayFire exception class.
Definition: exception.h:22
virtual const char * what() const
Returns an error message for the exception in a string format.
Definition: exception.h:46
AFAPI array abs(const array &in)
C++ Interface to calculate the absolute value.
AFAPI array exp(const array &in)
C++ Interface to evaluate the exponential.
AFAPI array log(const array &in)
C++ Interface to evaluate the natural logarithm.
AFAPI array matmulTN(const array &lhs, const array &rhs)
C++ Interface to multiply two matrices.
AFAPI array matmul(const array &lhs, const array &rhs, const matProp optLhs=AF_MAT_NONE, const matProp optRhs=AF_MAT_NONE)
C++ Interface to multiply two matrices.
array constant(T val, const dim4 &dims, const dtype ty=(af_dtype) dtype_traits< T >::ctype)
C++ Interface to generate an array with elements set to a specified value.
AFAPI void info()
AFAPI void setDevice(const int device)
Sets the current device.
AFAPI void sync(const int device=-1)
Blocks until the device is finished processing.
AFAPI array join(const int dim, const array &first, const array &second)
C++ Interface to join 2 arrays along a dimension.
AFAPI array moddims(const array &in, const dim4 &dims)
C++ Interface to modify the dimensions of an input array to a specified shape.
AFAPI array sum(const array &in, const int dim=-1)
C++ Interface to sum array elements over a given dimension.
Definition: algorithm.h:15
AFAPI array batchFunc(const array &lhs, const array &rhs, batchFunc_t func)