Examples¶

Image Rendering¶

CPU¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>

const unsigned DIMX = 512;
const unsigned DIMY = 512;

struct Bitmap {
    unsigned char* ptr;
    unsigned width;
    unsigned height;
};

Bitmap createBitmap(unsigned w, unsigned h);
void destroyBitmap(Bitmap& bmp);
void kernel(Bitmap& bmp);
int julia(int x, int y, int width, int height);

int main(void) {
    Bitmap bmp = createBitmap(DIMX, DIMY);

    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Fractal Demo");
    wnd.makeCurrent();

    /* create an font object and load necessary font
     * and later pass it on to window object so that
     * it can be used for rendering text
     *
     * NOTE: THIS IS OPTIONAL STEP, BY DEFAULT WINDOW WILL
     * HAVE FONT ALREADY SETUP*/
    forge::Font fnt;
#if defined(OS_WIN)
    fnt.loadSystemFont("Calibri");
#else
    fnt.loadSystemFont("Vera");
#endif
    wnd.setFont(&fnt);

    /* Create an image object which creates the necessary
     * textures and pixel buffer objects to hold the image
     * */
    forge::Image img(DIMX, DIMY, FG_RGBA, forge::u8);
    /* copy your data into the pixel buffer object exposed by
     * forge::Image class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */
    kernel(bmp);

    GfxHandle* handle = 0;

    // create GL-CPU interop buffer
    createGLBuffer(&handle, img.pixels(), FORGE_IMAGE_BUFFER);

    // copy the data from compute buffer to graphics buffer
    copyToGLBuffer(handle, (ComputeResourceHandle)bmp.ptr, img.size());

    do { wnd.draw(img); } while (!wnd.close());

    // destroy GL-CPU Interop buffer
    releaseGLBuffer(handle);
    destroyBitmap(bmp);
    return 0;
}

Bitmap createBitmap(unsigned w, unsigned h) {
    Bitmap retVal;
    retVal.width  = w;
    retVal.height = h;
    retVal.ptr    = new unsigned char[4 * w * h];
    return retVal;
}

void destroyBitmap(Bitmap& bmp) { delete[] bmp.ptr; }

void kernel(Bitmap& bmp) {
    for (unsigned y = 0; y < bmp.height; ++y) {
        for (unsigned x = 0; x < bmp.width; ++x) {
            int offset              = x + y * bmp.width;
            int juliaVal            = julia(x, y, bmp.width, bmp.height);
            bmp.ptr[offset * 4 + 0] = 255 * juliaVal;
            bmp.ptr[offset * 4 + 1] = 0;
            bmp.ptr[offset * 4 + 2] = 0;
            bmp.ptr[offset * 4 + 3] = 255;
        }
    }
}

int julia(int x, int y, int width, int height) {
    const float scale = 1.5;
    float jx          = scale * (float)(width / 2.0f - x) / (width / 2.0f);
    float jy          = scale * (float)(height / 2.0f - y) / (height / 2.0f);

    std::complex<float> c(-0.8f, 0.156f);
    std::complex<float> a(jx, jy);

    for (int i = 0; i < 200; i++) {
        a = a * a + c;
        if (abs(a) > 1000) return 0;
    }

    return 1;
}

CUDA¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <cuComplex.h>
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>

const unsigned DIMX   = 512;
const unsigned DIMY   = 512;
const size_t TOT_SIZE = DIMX * DIMY * 4;

void kernel(unsigned char* dev_out);

int main(void) {
    unsigned char* dev_out;

    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Fractal Demo");
    wnd.makeCurrent();

    /* Create an image object which creates the necessary
     * textures and pixel buffer objects to hold the image
     * */
    forge::Image img(DIMX, DIMY, FG_RGBA, forge::u8);

    GfxHandle* handle = 0;

    // create GL-CPU interop buffer
    createGLBuffer(&handle, img.pixels(), FORGE_IMAGE_BUFFER);

    /* copy your data into the pixel buffer object exposed by
     * forge::Image class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */
    FORGE_CUDA_CHECK(cudaMalloc((void**)&dev_out, TOT_SIZE));
    kernel(dev_out);

    // copy the data from compute buffer to graphics buffer
    copyToGLBuffer(handle, (ComputeResourceHandle)dev_out, img.size());

    do { wnd.draw(img); } while (!wnd.close());

    // destroy GL-CPU Interop buffer
    releaseGLBuffer(handle);
    FORGE_CUDA_CHECK(cudaFree(dev_out));
    return 0;
}

__device__ int julia(int x, int y) {
    const float scale = 1.5;
    float jx          = scale * (float)(DIMX / 2.0f - x) / (DIMX / 2.0f);
    float jy          = scale * (float)(DIMY / 2.0f - y) / (DIMY / 2.0f);

    cuFloatComplex c = make_cuFloatComplex(-0.8f, 0.156f);
    cuFloatComplex a = make_cuFloatComplex(jx, jy);

    for (int i = 0; i < 200; i++) {
        a = cuCaddf(cuCmulf(a, a), c);
        if (cuCabsf(a) > 1000.0f) return 0;
    }

    return 1;
}

__global__ void julia(unsigned char* out) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < DIMX && y < DIMY) {
        int offset = x + y * DIMX;
        // now calculate the value at that position
        int juliaValue = julia(x, y);

        out[offset * 4 + 2] = 255 * juliaValue;
        out[offset * 4 + 0] = 0;
        out[offset * 4 + 1] = 0;
        out[offset * 4 + 3] = 255;
    }
}

inline int divup(int a, int b) { return (a + b - 1) / b; }

void kernel(unsigned char* dev_out) {
    static const dim3 threads(8, 8);
    dim3 blocks(divup(DIMX, threads.x), divup(DIMY, threads.y));

    // clang-format off
    julia<<<blocks, threads>>>(dev_out);
    // clang-format on
}

OpenCL¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>

#include "cl_helpers.h"

#include <algorithm>
#include <iostream>
#include <mutex>
#include <sstream>

using namespace cl;
using namespace std;

const unsigned DIMX     = 512;
const unsigned DIMY     = 512;
const unsigned IMG_SIZE = DIMX * DIMY * 4;

#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>

// clang-format off
static const std::string fractal_ocl_kernel =
R"EOK(
float magnitude(float2 a) {
    return sqrt(a.s0*a.s0+a.s1*a.s1);
}
float2 mul(float2 a, float2 b) {
    return (float2)(a.s0*b.s0-a.s1*b.s1, a.s1*b.s0+a.s0*b.s1);
}
float2 add(float2 a, float2 b) {
    return (float2)(a.s0+b.s0, a.s1+b.s1);
}
int pixel(int x, int y, int width, int height) {
    const float scale = 1.5;
    float jx = scale * (float)(width/2.0f - x)/(width/2.0f);
    float jy = scale * (float)(height/2.0f - y)/(height/2.0f);
    float2 c = (float2)(-0.8f, 0.156f);
    float2 a = (float2)(jx, jy);

    for (int i=0; i<200; i++) {
        a = add(mul(a, a), c);
        if (magnitude(a) > 1000.0f)
            return 0;
    }
    return 1;
}

kernel
void julia(global unsigned char* out, const unsigned w, const unsigned h) {
    int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
    int y = get_group_id(1) * get_local_size(1) + get_local_id(1);
    if (x<w && y<h) {
        int offset        = x + y * w;
        int juliaValue    = pixel(x, y, w, h);
        out[offset*4 + 1] = 255 * juliaValue;
        out[offset*4 + 0] = 0;
        out[offset*4 + 2] = 0;
        out[offset*4 + 3] = 255;
    }
}
)EOK";
// clang-format on

inline int divup(int a, int b) {
    return (a + b - 1) / b;
}

void kernel(cl::Buffer& devOut, cl::CommandQueue& queue) {
    static std::once_flag compileFlag;
    static cl::Program prog;
    static cl::Kernel kern;

    std::call_once(compileFlag, [queue]() {
        prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(),
                           fractal_ocl_kernel, true);
        kern = cl::Kernel(prog, "julia");
    });

    auto juliaOp = cl::KernelFunctor<Buffer, unsigned, unsigned>(kern);

    static const NDRange local(8, 8);
    NDRange global(local[0] * divup(DIMX, (int)(local[0])),
                   local[1] * divup(DIMY, (int)(local[1])));

    juliaOp(EnqueueArgs(queue, global, local), devOut, DIMX, DIMY);
}

int main(void) {
    try {
        /*
         * First Forge call should be a window creation call
         * so that necessary OpenGL context is created for any
         * other forge::* object to be created successfully
         */
        forge::Window wnd(DIMX, DIMY, "Fractal Demo");
        wnd.makeCurrent();

        /* Create an image object which creates the necessary
         * textures and pixel buffer objects to hold the image
         * */
        forge::Image img(DIMX, DIMY, FG_RGBA, forge::u8);

        /*
         * Helper function to create a CLGL interop context.
         * This function checks for if the extension is available
         * and creates the context on the appropriate device.
         * Note: context and queue are defined in cl_helpers.h
         */
        context       = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue         = CommandQueue(context, device);

        /* copy your data into the pixel buffer object exposed by
         * forge::Image class and then proceed to rendering.
         * To help the users with copying the data from compute
         * memory to display memory, Forge provides copy headers
         * along with the library to help with this task
         */
        cl::Buffer devOut(context, CL_MEM_READ_WRITE, IMG_SIZE);

        kernel(devOut, queue);

        GfxHandle* handle = 0;

        // create GL-CPU interop buffer
        createGLBuffer(&handle, img.pixels(), FORGE_IMAGE_BUFFER);

        // copy the data from compute buffer to graphics buffer
        copyToGLBuffer(handle, (ComputeResourceHandle)devOut(), img.size());

        do { wnd.draw(img); } while (!wnd.close());

        // destroy GL-CPU Interop buffer
        releaseGLBuffer(handle);

    } catch (forge::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }

    return 0;
}

Bubblechart with Transparency¶

CPU¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <algorithm>
#include <cmath>
#include <complex>
#include <functional>
#include <iostream>
#include <iterator>
#include <random>
#include <vector>

const unsigned DIMX = 1000;
const unsigned DIMY = 800;

const float FRANGE_START = 0.f;
const float FRANGE_END   = 2.f * 3.1415926f;

using namespace std;
void map_range_to_vec_vbo(float range_start, float range_end, float dx,
                          std::vector<float>& vec, float (*map)(float)) {
    if (range_start > range_end && dx > 0) return;
    for (float i = range_start; i < range_end; i += dx) {
        vec.push_back(i);
        vec.push_back((*map)(i));
    }
}

int main(void) {
    std::vector<float> cosData;
    std::vector<float> tanData;

    map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, cosData, &cosf);
    map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, tanData, &tanf);

    std::random_device r;

    std::default_random_engine e1(r());
    std::mt19937_64 gen(r());

    std::uniform_real_distribution<float> nDist(0.0f, 1.0f);
    std::uniform_real_distribution<float> cDist(0.2f, 0.6f);
    std::uniform_real_distribution<float> fDist(0.4f, 0.6f);

    auto clr = std::bind(cDist, gen);
    auto rnd = std::bind(nDist, e1);
    auto alp = std::bind(fDist, gen);

    std::vector<float> colors(3 * tanData.size());
    std::vector<float> alphas(tanData.size());
    std::vector<float> radii(tanData.size());

    std::generate(colors.begin(), colors.end(), clr);
    std::generate(radii.begin(), radii.end(),
                  [&] { return 20.0f + 60.0f * rnd(); });
    std::generate(alphas.begin(), alphas.end(), alp);

    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Bubble chart with Transparency Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_2D);
    chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);

    /* Create several plot objects which creates the necessary
     * vertex buffer objects to hold the different plot types
     */
    forge::Plot plt1 =
        chart.plot((unsigned)(cosData.size() / 2), forge::f32, FG_PLOT_LINE,
                   FG_MARKER_TRIANGLE);  // or specify a specific plot type
    forge::Plot plt2 =
        chart.plot((unsigned)(tanData.size() / 2), forge::f32, FG_PLOT_LINE,
                   FG_MARKER_CIRCLE);  // last parameter specifies marker shape

    /* Set plot colors */
    plt1.setColor(FG_RED);
    plt2.setColor(FG_GREEN);  // use a forge predefined color
    /* Set plot legends */
    plt1.setLegend("Cosine");
    plt2.setLegend("Tangent");
    /* set plot global marker size */
    plt1.setMarkerSize(20);
    /* copy your data into the opengl buffer object exposed by
     * forge::Plot class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */

    GfxHandle* handles[5];

    // create GL-CPU interop buffers
    createGLBuffer(&handles[0], plt1.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[1], plt2.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], plt2.colors(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[3], plt2.alphas(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[4], plt2.radii(), FORGE_VERTEX_BUFFER);

    // copy the data from compute buffer to graphics buffer
    copyToGLBuffer(handles[0], (ComputeResourceHandle)cosData.data(),
                   plt1.verticesSize());
    copyToGLBuffer(handles[1], (ComputeResourceHandle)tanData.data(),
                   plt2.verticesSize());

    /* update color value for tan graph */
    copyToGLBuffer(handles[2], (ComputeResourceHandle)colors.data(),
                   plt2.colorsSize());
    /* update alpha values for tan graph */
    copyToGLBuffer(handles[3], (ComputeResourceHandle)alphas.data(),
                   plt2.alphasSize());
    /* update marker sizes for tan graph markers */
    copyToGLBuffer(handles[4], (ComputeResourceHandle)radii.data(),
                   plt2.radiiSize());

    do { wnd.draw(chart); } while (!wnd.close());

    // destroy GL-CPU Interop buffer
    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);
    releaseGLBuffer(handles[3]);
    releaseGLBuffer(handles[4]);
    return 0;
}

CUDA¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <cuda_runtime.h>
#include <curand.h>
#include <curand_kernel.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
#include <iostream>

const unsigned DIMX = 1000;
const unsigned DIMY = 800;

static const float DX           = 0.1f;
static const float FRANGE_START = 0.f;
static const float FRANGE_END   = 2 * 3.141592f;
static const size_t DATA_SIZE   = (size_t)((FRANGE_END - FRANGE_START) / DX);

curandState_t* state;

void kernel(float* dev_out, int functionCode, float* colors, float* alphas,
            float* radii);

inline int divup(int a, int b) { return (a + b - 1) / b; }

__global__ void setupRandomKernel(curandState* states,
                                  unsigned long long seed) {
    unsigned tid = blockDim.x * blockIdx.x + threadIdx.x;
    curand_init(seed, tid, 0, &states[tid]);
}

int main(void) {
    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&state, DATA_SIZE * sizeof(curandState_t)));
    // clang-format off
    setupRandomKernel<<<divup(DATA_SIZE, 32), 32>>>(state, 314567);
    // clang-format on

    float* cos_out;
    float* tan_out;
    float* colors_out;
    float* alphas_out;
    float* radii_out;

    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&cos_out, sizeof(float) * DATA_SIZE * 2));
    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&tan_out, sizeof(float) * DATA_SIZE * 2));
    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&colors_out, sizeof(float) * DATA_SIZE * 3));
    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&alphas_out, sizeof(float) * DATA_SIZE));
    FORGE_CUDA_CHECK(cudaMalloc((void**)&radii_out, sizeof(float) * DATA_SIZE));

    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Bubble chart with Transparency Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_2D);
    chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);

    /* Create several plot objects which creates the necessary
     * vertex buffer objects to hold the different plot types
     */
    forge::Plot plt1 =
        chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE, FG_MARKER_TRIANGLE);
    forge::Plot plt2 =
        chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE, FG_MARKER_CIRCLE);

    /* Set plot colors */
    plt1.setColor(FG_RED);
    plt2.setColor(FG_GREEN);  // use a forge predefined color
    /* Set plot legends */
    plt1.setLegend("Cosine");
    plt2.setLegend("Tangent");
    /* set plot global marker size */
    plt1.setMarkerSize(20);
    /* copy your data into the opengl buffer object exposed by
     * forge::Plot class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */

    GfxHandle* handles[5];

    // create GL-CUDA interop buffers
    createGLBuffer(&handles[0], plt1.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[1], plt2.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], plt2.colors(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[3], plt2.alphas(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[4], plt2.radii(), FORGE_VERTEX_BUFFER);

    kernel(cos_out, 0, NULL, NULL, NULL);
    kernel(tan_out, 1, colors_out, alphas_out, radii_out);

    // copy the data from compute buffer to graphics buffer
    copyToGLBuffer(handles[0], (ComputeResourceHandle)cos_out,
                   plt1.verticesSize());
    copyToGLBuffer(handles[1], (ComputeResourceHandle)tan_out,
                   plt2.verticesSize());

    /* update color value for tan graph */
    copyToGLBuffer(handles[2], (ComputeResourceHandle)colors_out,
                   plt2.colorsSize());
    /* update alpha values for tan graph */
    copyToGLBuffer(handles[3], (ComputeResourceHandle)alphas_out,
                   plt2.alphasSize());
    /* update marker sizes for tan graph markers */
    copyToGLBuffer(handles[4], (ComputeResourceHandle)radii_out,
                   plt2.radiiSize());

    do { wnd.draw(chart); } while (!wnd.close());

    // destroy GL-CUDA Interop buffer
    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);
    releaseGLBuffer(handles[3]);
    releaseGLBuffer(handles[4]);
    // destroy CUDA handles
    FORGE_CUDA_CHECK(cudaFree(cos_out));
    FORGE_CUDA_CHECK(cudaFree(tan_out));
    FORGE_CUDA_CHECK(cudaFree(colors_out));
    FORGE_CUDA_CHECK(cudaFree(alphas_out));
    FORGE_CUDA_CHECK(cudaFree(radii_out));

    return 0;
}

__global__ void mapKernel(float* out, int functionCode, float frange_start,
                          float dx) {
    int id  = blockIdx.x * blockDim.x + threadIdx.x;
    float x = frange_start + id * dx;
    float y;

    switch (functionCode) {
        case 0: y = cos(x); break;
        case 1: y = tan(x); break;
        default: y = sin(x); break;
    }

    out[2 * id + 0] = x;
    out[2 * id + 1] = y;
}

__global__ void colorsKernel(float* colors, curandState* states) {
    int id = blockIdx.x * blockDim.x + threadIdx.x;

    colors[3 * id + 0] = curand_uniform(&states[id]);
    colors[3 * id + 1] = curand_uniform(&states[id]);
    colors[3 * id + 2] = curand_uniform(&states[id]);
}

__global__ void randKernel(float* out, curandState* states, float min,
                           float scale) {
    int id  = blockIdx.x * blockDim.x + threadIdx.x;
    out[id] = curand_uniform(&states[id]) * scale + min;
}

void kernel(float* dev_out, int functionCode, float* colors, float* alphas,
            float* radii) {
    static const dim3 threads(32);
    dim3 blocks(divup(DATA_SIZE, 32));

    // clang-format off
    mapKernel<<<blocks, threads>>>(dev_out, functionCode, FRANGE_START, DX);

    if (colors) colorsKernel<<<blocks, threads>>>(colors, state);

    if (alphas) randKernel<<<blocks, threads>>>(alphas, state, 0, 1);

    if (radii) randKernel<<<blocks, threads>>>(radii, state, 20, 60);
    // clang-format on
}

OpenCL¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>

#include "cl_helpers.h"

#include <algorithm>
#include <cmath>
#include <ctime>
#include <iostream>
#include <sstream>
#include <vector>

using namespace cl;
using namespace std;

const unsigned DIMX = 1000;
const unsigned DIMY = 800;

static const float DX           = 0.1f;
static const float FRANGE_START = 0.f;
static const float FRANGE_END   = 2 * 3.141592f;
static const int DATA_SIZE      = (int)((FRANGE_END - FRANGE_START) / DX);

#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>

// clang-format off
static const std::string chartKernels =
R"EOK(
float rand(int x) {
    x = (x << 13) ^ x;
    return (1.0 - ((x * (x * x * 15731 + 789221) + 1376312589) & 0x7fffffff) /
                      1073741824.0);
}

kernel void randKernel(global float* out, unsigned seed, float min, float scale,
                       int DATA_SIZE) {
    int id = get_global_id(0);
    if (id < DATA_SIZE) out[id] = scale * (1 + rand(seed * id)) / 2.0f + min;
}

kernel void colorsKernel(global float* out, unsigned rseed, unsigned gseed,
                         unsigned bseed, int DATA_SIZE) {
    int id = get_global_id(0);
    if (id < DATA_SIZE) {
        out[3 * id + 0] = (1 + rand(rseed * id)) / 2.0f;
        out[3 * id + 1] = (1 + rand(gseed * id)) / 2.0f;
        out[3 * id + 2] = (1 + rand(bseed * id)) / 2.0f;
    }
}

kernel void mapKernel(global float* out, int functionCode, float FRANGE_START,
                      float DX, int DATA_SIZE) {
    int id  = get_global_id(0);
    float x = FRANGE_START + id * DX;
    float y;

    switch (functionCode) {
        case 0: y = cos(x); break;
        case 1: y = tan(x); break;
        default: y = sin(x); break;
    }

    if (id < DATA_SIZE) {
        out[2 * id + 0] = x;
        out[2 * id + 1] = y;
    }
}
)EOK";
// clang-format on

inline int divup(int a, int b)
{
    return (a + b - 1) / b;
}

void kernel(cl::Buffer& devOut, int fnCode, int outFlags, cl::Buffer& colorsOut,
            cl::Buffer& alphasOut, cl::Buffer& radiiOut,
            cl::CommandQueue& queue, cl::Device& device) {
    static bool compileFlag = true;

    static cl::Program prog;
    static cl::Kernel randKernel, colorsKernel, mapKernel;

    std::srand((unsigned)(std::time(0)));

    if (compileFlag) {
        try {
            prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), chartKernels,
                               false);

            std::vector<cl::Device> devs;
            devs.push_back(device);
            prog.build(devs);

            randKernel   = cl::Kernel(prog, "randKernel");
            colorsKernel = cl::Kernel(prog, "colorsKernel");
            mapKernel    = cl::Kernel(prog, "mapKernel");
        } catch (cl::Error err) {
            std::cout << "Compile Errors: " << std::endl;
            std::cout << err.what() << err.err() << std::endl;
            std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
                      << std::endl;
            exit(255);
        }
        std::cout << "Kernels compiled successfully" << std::endl;
        compileFlag = false;
    }

    static const NDRange local(32);
    NDRange global(local[0] * divup(DATA_SIZE, (int)(local[0])));

    mapKernel.setArg(0, devOut);
    mapKernel.setArg(1, fnCode);
    mapKernel.setArg(2, FRANGE_START);
    mapKernel.setArg(3, DX);
    mapKernel.setArg(4, DATA_SIZE);
    queue.enqueueNDRangeKernel(mapKernel, cl::NullRange, global, local);

    if (outFlags & 0x00000001) {
        colorsKernel.setArg(0, colorsOut);
        colorsKernel.setArg(1, std::rand());
        colorsKernel.setArg(2, std::rand());
        colorsKernel.setArg(3, std::rand());
        colorsKernel.setArg(4, DATA_SIZE);
        queue.enqueueNDRangeKernel(colorsKernel, cl::NullRange, global, local);
    }

    if (outFlags & 0x00000002) {
        randKernel.setArg(0, alphasOut);
        randKernel.setArg(1, std::rand());
        randKernel.setArg(2, 0.0f);
        randKernel.setArg(3, 1.0f);
        randKernel.setArg(4, DATA_SIZE);
        queue.enqueueNDRangeKernel(randKernel, cl::NullRange, global, local);
    }

    if (outFlags & 0x00000004) {
        randKernel.setArg(0, radiiOut);
        randKernel.setArg(1, std::rand());
        randKernel.setArg(2, 20.0f);
        randKernel.setArg(3, 60.0f);
        randKernel.setArg(4, DATA_SIZE);
        queue.enqueueNDRangeKernel(randKernel, cl::NullRange, global, local);
    }
}

int main(void) {
    try {
        /*
         * First Forge call should be a window creation call
         * so that necessary OpenGL context is created for any
         * other forge::* object to be created successfully
         */
        forge::Window wnd(DIMX, DIMY, "Bubble chart with Transparency Demo");
        wnd.makeCurrent();

        forge::Chart chart(FG_CHART_2D);
        chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);

        /* Create several plot objects which creates the necessary
         * vertex buffer objects to hold the different plot types
         */
        forge::Plot plt1 =
            chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE, FG_MARKER_TRIANGLE);
        forge::Plot plt2 =
            chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE, FG_MARKER_CIRCLE);

        /* Set plot colors */
        plt1.setColor(FG_RED);
        plt2.setColor(FG_GREEN);  // use a forge predefined color
        /* Set plot legends */
        plt1.setLegend("Cosine");
        plt2.setLegend("Tangent");
        /* set plot global marker size */
        plt1.setMarkerSize(20);

        /*
         * Helper function to create a CLGL interop context.
         * This function checks for if the extension is available
         * and creates the context on the appropriate device.
         * Note: context and queue are defined in cl_helpers.h
         */
        context       = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue         = CommandQueue(context, device);

        GfxHandle* handles[5];

        // create GL-OpenCL interop buffers
        createGLBuffer(&handles[0], plt1.vertices(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[1], plt2.vertices(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[2], plt2.colors(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[3], plt2.alphas(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[4], plt2.radii(), FORGE_VERTEX_BUFFER);

        cl::Buffer cosOut(context, CL_MEM_READ_WRITE,
                          sizeof(float) * DATA_SIZE * 2);
        cl::Buffer tanOut(context, CL_MEM_READ_WRITE,
                          sizeof(float) * DATA_SIZE * 2);
        cl::Buffer colorsOut(context, CL_MEM_READ_WRITE,
                             sizeof(float) * DATA_SIZE * 3);
        cl::Buffer alphasOut(context, CL_MEM_READ_WRITE,
                             sizeof(float) * DATA_SIZE);
        cl::Buffer radiiOut(context, CL_MEM_READ_WRITE,
                            sizeof(float) * DATA_SIZE);
        cl::Buffer dummy;

        kernel(cosOut, 0, 0, dummy, dummy, dummy, queue, device);
        kernel(tanOut, 1, 0x00000007, colorsOut, alphasOut, radiiOut, queue,
               device);

        /* copy your data into the opengl buffer object exposed by
         * forge::Plot class and then proceed to rendering.
         * To help the users with copying the data from compute
         * memory to display memory, Forge provides copy headers
         * along with the library to help with this task
         */
        copyToGLBuffer(handles[0], (ComputeResourceHandle)cosOut(),
                       plt1.verticesSize());
        copyToGLBuffer(handles[1], (ComputeResourceHandle)tanOut(),
                       plt2.verticesSize());

        /* update color value for tan graph */
        copyToGLBuffer(handles[2], (ComputeResourceHandle)colorsOut(),
                       plt2.colorsSize());
        /* update alpha values for tan graph */
        copyToGLBuffer(handles[3], (ComputeResourceHandle)alphasOut(),
                       plt2.alphasSize());
        /* update marker sizes for tan graph markers */
        copyToGLBuffer(handles[4], (ComputeResourceHandle)radiiOut(),
                       plt2.radiiSize());

        do { wnd.draw(chart); } while (!wnd.close());

        // destroy GL-OpenCL Interop buffer
        releaseGLBuffer(handles[0]);
        releaseGLBuffer(handles[1]);
        releaseGLBuffer(handles[2]);
        releaseGLBuffer(handles[3]);
        releaseGLBuffer(handles[4]);

    } catch (forge::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }

    return 0;
}

2D Vector Field¶

CPU¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>

const unsigned DIMX      = 640;
const unsigned DIMY      = 480;
const float PI           = 3.14159265359f;
const float MINIMUM      = 1.0f;
const float MAXIMUM      = 20.f;
const float STEP         = 2.0f;
const float NELEMS       = (MAXIMUM - MINIMUM + 1) / STEP;
const unsigned DPOINTS[] = {5, 5, 5, 15, 15, 5, 15, 15};

using namespace std;

void generatePoints(std::vector<float> &points, std::vector<float> &dirs) {
    points.clear();

    for (int j = 0; j < NELEMS; ++j) {
        float y = MINIMUM + j * STEP;
        for (int i = 0; i < NELEMS; ++i) {
            float x = MINIMUM + i * STEP;
            points.push_back(x);
            points.push_back(y);
            dirs.push_back(sin(2 * PI * x / 10.f));
            dirs.push_back(sin(2 * PI * y / 10.f));
        }
    }
}

int main(void) {
    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Vector Field Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_2D);
    chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM);
    chart.setAxesTitles("x-axis", "y-axis");

    forge::Plot divPoints =
        chart.plot(4, forge::u32, FG_PLOT_SCATTER, FG_MARKER_CIRCLE);
    divPoints.setColor(0.9f, 0.9f, 0.0f, 1.f);
    divPoints.setLegend("Convergence Points");
    divPoints.setMarkerSize(24);

    forge::VectorField field =
        chart.vectorField((unsigned)(NELEMS * NELEMS), forge::f32);
    field.setColor(0.f, 0.6f, 0.3f, 1.f);

    std::vector<float> points;
    std::vector<float> dirs;
    generatePoints(points, dirs);

    GfxHandle *handles[3];

    createGLBuffer(&handles[0], divPoints.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[1], field.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);

    copyToGLBuffer(handles[0], (ComputeResourceHandle)DPOINTS,
                   divPoints.verticesSize());
    copyToGLBuffer(handles[1], (ComputeResourceHandle)points.data(),
                   field.verticesSize());
    copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs.data(),
                   field.directionsSize());

    do { wnd.draw(chart); } while (!wnd.close());

    // destroy GL-cpu interop buffers
    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);

    return 0;
}

CUDA¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>

#define PI 3.14159265359

const unsigned DIMX      = 640;
const unsigned DIMY      = 480;
const float MINIMUM      = 1.0f;
const float MAXIMUM      = 20.f;
const float STEP         = 2.0f;
const float NELEMS       = (MAXIMUM - MINIMUM + 1) / STEP;
const unsigned DPOINTS[] = {5, 5, 5, 15, 15, 5, 15, 15};

void generatePoints(float* points, float* dirs);

inline int divup(int a, int b) { return (a + b - 1) / b; }

int main(void) {
    unsigned* dpoints;
    float* points;
    float* dirs;
    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Vector Field Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_2D);
    chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM);
    chart.setAxesTitles("x-axis", "y-axis");

    forge::Plot divPoints =
        chart.plot(4, forge::u32, FG_PLOT_SCATTER, FG_MARKER_CIRCLE);
    divPoints.setColor(0.9f, 0.9f, 0.0f, 1.f);
    divPoints.setLegend("Convergence Points");
    divPoints.setMarkerSize(24);

    size_t npoints = (size_t)(NELEMS * NELEMS);

    forge::VectorField field =
        chart.vectorField((unsigned)(npoints), forge::f32);
    field.setColor(0.f, 0.6f, 0.3f, 1.f);

    FORGE_CUDA_CHECK(cudaMalloc((void**)&dpoints, 8 * sizeof(unsigned)));
    FORGE_CUDA_CHECK(cudaMalloc((void**)&points, 2 * npoints * sizeof(float)));
    FORGE_CUDA_CHECK(cudaMalloc((void**)&dirs, 2 * npoints * sizeof(float)));

    GfxHandle* handles[3];

    createGLBuffer(&handles[0], divPoints.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[1], field.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);

    FORGE_CUDA_CHECK(cudaMemcpy(dpoints, DPOINTS, 8 * sizeof(unsigned),
                                cudaMemcpyHostToDevice));
    generatePoints(points, dirs);

    copyToGLBuffer(handles[0], (ComputeResourceHandle)dpoints,
                   divPoints.verticesSize());

    copyToGLBuffer(handles[1], (ComputeResourceHandle)points,
                   field.verticesSize());
    copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs,
                   field.directionsSize());

    do { wnd.draw(chart); } while (!wnd.close());

    // destroy GL-CUDA interop buffers
    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);
    // destroy CUDA handles
    FORGE_CUDA_CHECK(cudaFree(dpoints));
    FORGE_CUDA_CHECK(cudaFree(points));
    FORGE_CUDA_CHECK(cudaFree(dirs));

    return 0;
}

__global__ void pointGenKernel(float* points, float* dirs, int nelems,
                               float minimum, float step) {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
    int j = blockDim.y * blockIdx.y + threadIdx.y;

    if (i < nelems && j < nelems) {
        int id = i + j * nelems;

        float x = minimum + i * step;
        float y = minimum + j * step;

        points[2 * id + 0] = x;
        points[2 * id + 1] = y;

        dirs[2 * id + 0] = sinf(2.0f * PI * x / 10.f);
        dirs[2 * id + 1] = sinf(2.0f * PI * y / 10.f);
    }
}

void generatePoints(float* points, float* dirs) {
    static dim3 threads(8, 8);
    dim3 blocks(divup((int)(NELEMS), threads.x),
                divup((int)(NELEMS), threads.y));

    // clang-format off
    pointGenKernel<<<blocks, threads>>>(points, dirs, (int)(NELEMS), MINIMUM,
                                        STEP);
    // clang-format on
}

OpenCL¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>

#include "cl_helpers.h"

#include <algorithm>
#include <cmath>
#include <ctime>
#include <iostream>
#include <iterator>
#include <sstream>
#include <vector>

using namespace cl;
using namespace std;

const unsigned DIMX      = 640;
const unsigned DIMY      = 480;
const float MINIMUM      = 1.0f;
const float MAXIMUM      = 20.f;
const float STEP         = 2.0f;
const float NELEMS       = (MAXIMUM - MINIMUM + 1) / STEP;
const unsigned DPOINTS[] = {5, 5, 5, 15, 15, 5, 15, 15};

#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>

// clang-format off
static const std::string fieldKernel =
R"EOK(
constant float PI = 3.14159265359;

kernel void pointGenKernel(global float* points, global float* dirs, int NELEMS,
                           float MINIMUM, float STEP) {
    int i = get_global_id(0);
    int j = get_global_id(1);

    if (i < NELEMS && j < NELEMS) {
        int id = i + j * NELEMS;

        float x = MINIMUM + i * STEP;
        float y = MINIMUM + j * STEP;

        points[2 * id + 0] = x;
        points[2 * id + 1] = y;

        dirs[2 * id + 0] = sin(2.0 * PI * x / 10.0);
        dirs[2 * id + 1] = sin(2.0 * PI * y / 10.0);
    }
}
)EOK";
// clang-format on

inline int divup(int a, int b)
{
    return (a + b - 1) / b;
}

void generatePoints(cl::Buffer& points, cl::Buffer& dirs,
                    cl::CommandQueue& queue, cl::Device& device) {
    static bool compileFlag = true;

    static cl::Program prog;
    static cl::Kernel pointGenKernel;

    if (compileFlag) {
        try {
            prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), fieldKernel,
                               false);

            std::vector<cl::Device> devs;
            devs.push_back(device);
            prog.build(devs);

            pointGenKernel = cl::Kernel(prog, "pointGenKernel");
        } catch (cl::Error err) {
            std::cout << "Compile Errors: " << std::endl;
            std::cout << err.what() << err.err() << std::endl;
            std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
                      << std::endl;
            exit(255);
        }
        std::cout << "Kernels compiled successfully" << std::endl;
        compileFlag = false;
    }

    static const NDRange local(8, 8);
    NDRange global(local[0] * divup((int)(NELEMS), (int)(local[0])),
                   local[1] * divup((int)(NELEMS), (int)(local[1])));

    pointGenKernel.setArg(0, points);
    pointGenKernel.setArg(1, dirs);
    pointGenKernel.setArg(2, (int)NELEMS);
    pointGenKernel.setArg(3, MINIMUM);
    pointGenKernel.setArg(4, STEP);
    queue.enqueueNDRangeKernel(pointGenKernel, cl::NullRange, global, local);
}

int main(void) {
    try {
        /*
         * First Forge call should be a window creation call
         * so that necessary OpenGL context is created for any
         * other forge::* object to be created successfully
         */
        forge::Window wnd(DIMX, DIMY, "Vector Field Demo");
        wnd.makeCurrent();

        forge::Chart chart(FG_CHART_2D);
        chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM);
        chart.setAxesTitles("x-axis", "y-axis");

        forge::Plot divPoints =
            chart.plot(4, forge::u32, FG_PLOT_SCATTER, FG_MARKER_CIRCLE);
        divPoints.setColor(0.9f, 0.9f, 0.0f, 1.f);
        divPoints.setLegend("Convergence Points");
        divPoints.setMarkerSize(24);

        size_t npoints = (size_t)(NELEMS * NELEMS);

        forge::VectorField field =
            chart.vectorField((unsigned)(npoints), forge::f32);
        field.setColor(0.f, 0.6f, 0.3f, 1.f);

        /*
         * Helper function to create a CLGL interop context.
         * This function checks for if the extension is available
         * and creates the context on the appropriate device.
         * Note: context and queue are defined in cl_helpers.h
         */
        context       = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue         = CommandQueue(context, device);

        GfxHandle* handles[3];

        createGLBuffer(&handles[0], divPoints.vertices(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[1], field.vertices(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);

        cl::Buffer dpoints(context, CL_MEM_READ_WRITE, sizeof(unsigned) * 8);
        cl::Buffer points(context, CL_MEM_READ_WRITE,
                          sizeof(float) * 2 * npoints);
        cl::Buffer dirs(context, CL_MEM_READ_WRITE,
                        sizeof(float) * 2 * npoints);

        queue.enqueueWriteBuffer(dpoints, CL_TRUE, 0, sizeof(unsigned) * 8,
                                 DPOINTS);
        generatePoints(points, dirs, queue, device);

        copyToGLBuffer(handles[0], (ComputeResourceHandle)dpoints(),
                       divPoints.verticesSize());

        copyToGLBuffer(handles[1], (ComputeResourceHandle)points(),
                       field.verticesSize());
        copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs(),
                       field.directionsSize());

        do { wnd.draw(chart); } while (!wnd.close());

        // destroy GL-CUDA interop buffers
        releaseGLBuffer(handles[0]);
        releaseGLBuffer(handles[1]);
        releaseGLBuffer(handles[2]);

    } catch (forge::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }

    return 0;
}

Histogram¶

CPU¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <vector>

const unsigned IMGW  = 256;
const unsigned IMGH  = 256;
const unsigned DIMX  = 1000;
const unsigned DIMY  = 800;
const unsigned NBINS = 256;

using namespace std;

struct Bitmap {
    unsigned char* ptr;
    unsigned width;
    unsigned height;
};

class PerlinNoise {
   private:
    float base[IMGW][IMGH];
    float perlin[IMGW][IMGH];

   public:
    PerlinNoise();
    float noise(float u, float v);
};

Bitmap createBitmap(unsigned w, unsigned h);

void destroyBitmap(Bitmap& bmp);

void kernel(Bitmap& bmp);

void populateBins(Bitmap& bmp, int* hist_array, const unsigned nbins,
                  float* hist_cols);

int main(int argc, char* argv[]) {
    Bitmap bmp = createBitmap(IMGW, IMGH);
    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Histogram Demo");
    wnd.makeCurrent();

    forge::Image img(IMGW, IMGH, FG_RGBA, forge::u8);

    forge::Chart chart(FG_CHART_2D);

    /* set x axis limits to maximum and minimum values of data
     * and y axis limits to range [0, number of pixels ideally]
     * but practically total number of pixels as y range will skew
     * the histogram graph vertically. Therefore setting it to
     * 25% of total number of pixels */
    chart.setAxesLimits(0, 1, 0, IMGW * IMGH / (float)(NBINS / 4.0));

    /*
     * Create histogram object specifying number of bins
     */
    forge::Histogram hist = chart.histogram(NBINS, forge::s32);
    /*
     * Set histogram colors
     */
    hist.setColor(FG_YELLOW);

    GfxHandle* handles[3];

    createGLBuffer(&handles[0], img.pixels(), FORGE_IMAGE_BUFFER);
    createGLBuffer(&handles[1], hist.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], hist.colors(), FORGE_VERTEX_BUFFER);

    wnd.setColorMap((fg_color_map)(argc == 2 ? atoi(argv[1]) : 1));

    do {
        /*
         * generate image, and prepare data to pass into
         * Histogram's underlying vertex buffer object
         */
        kernel(bmp);

        copyToGLBuffer(handles[0], (ComputeResourceHandle)bmp.ptr, img.size());

        // forge::copy(img, (const void*)bmp.ptr);

        /* copy your data into the vertex buffer object exposed by
         * forge::Histogram class and then proceed to rendering.
         * To help the users with copying the data from compute
         * memory to display memory, Forge provides copy headers
         * along with the library to help with this task
         */
        std::vector<int> histArray(NBINS, 0);
        std::vector<float> colArray(3 * NBINS, 0.0f);
        populateBins(bmp, histArray.data(), NBINS, colArray.data());

        copyToGLBuffer(handles[1], (ComputeResourceHandle)histArray.data(),
                       hist.verticesSize());
        copyToGLBuffer(handles[2], (ComputeResourceHandle)colArray.data(),
                       hist.colorsSize());

        /*
         * Split the window into grid regions
         */
        // wnd.draw(2, 2, 0, img,  "Dynamic Perlin Noise" );
        // wnd.draw(2, 2, 1, img,  "Dynamic Perlin Noise" );
        // wnd.draw(2, 1, 1, chart, "Histogram of Noisy Image");
        wnd.draw(2, 3, 0, img, "Dynamic Perlin Noise");
        wnd.draw(2, 3, 1, img, "Dynamic Perlin Noise");
        wnd.draw(2, 3, 2, img, "Dynamic Perlin Noise");
        wnd.draw(2, 2, 2, chart, "Histogram of Noisy Image");
        wnd.draw(2, 2, 3, chart, "Histogram of Noisy Image");

        wnd.swapBuffers();
    } while (!wnd.close());

    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);

    return 0;
}

float interp(float x0, float x1, float alpha) {
    return x0 * (1 - alpha) + alpha * x1;
}

PerlinNoise::PerlinNoise() {
    std::srand((unsigned)(std::time(0)));

    for (unsigned i = 0; i < IMGW; i++) {
        for (unsigned j = 0; j < IMGH; j++) {
            base[i][j]   = std::rand() / (float)(RAND_MAX);
            perlin[i][j] = 0;
        }
    }

    float persistence = 0.5f;
    float amp         = 1.0f;
    float tamp        = 0.0f;

    for (int octave = 6; octave >= 0; --octave) {
        int period = 1 << octave;
        float freq = 1.0f / period;

        for (unsigned i = 0; i < IMGW; i++) {
            int si0      = (i / period) * period;
            int si1      = (si0 + period) % IMGW;
            float hblend = (i - si0) * freq;

            for (unsigned j = 0; j < IMGH; j++) {
                int sj0      = (j / period) * period;
                int sj1      = (sj0 + period) % IMGH;
                float vblend = (j - sj0) * freq;

                float top = interp(base[si0][sj0], base[si1][sj0], hblend);
                float bot = interp(base[si0][sj1], base[si1][sj1], hblend);

                perlin[i][j] += (amp * interp(top, bot, vblend));
            }
        }
        tamp += amp;
        amp *= persistence;
    }

    for (unsigned i = 0; i < IMGW; i++)
        for (unsigned j = 0; j < IMGH; j++) perlin[i][j] /= tamp;
}

float PerlinNoise::noise(float u, float v) {
    return perlin[(unsigned)(IMGW * u)][(unsigned)(IMGH * v)];
}

Bitmap createBitmap(unsigned w, unsigned h) {
    Bitmap retVal;
    retVal.width  = w;
    retVal.height = h;
    retVal.ptr    = new unsigned char[4 * w * h];
    return retVal;
}

void destroyBitmap(Bitmap& bmp) { delete[] bmp.ptr; }

void kernel(Bitmap& bmp) {
    PerlinNoise perlin;

    for (unsigned y = 0; y < bmp.height; ++y) {
        for (unsigned x = 0; x < bmp.width; ++x) {
            int offset = x + y * bmp.width;

            float u = x / (float)(bmp.width);
            float v = y / (float)(bmp.height);

            unsigned char noiseVal  = (unsigned char)(255 * perlin.noise(u, v));
            bmp.ptr[offset * 4 + 0] = noiseVal;
            bmp.ptr[offset * 4 + 1] = noiseVal;
            bmp.ptr[offset * 4 + 2] = noiseVal;
            bmp.ptr[offset * 4 + 3] = 255;
        }
    }
}

void populateBins(Bitmap& bmp, int* hist_array, const unsigned nbins,
                  float* hist_cols) {
    for (unsigned y = 0; y < bmp.height; ++y) {
        for (unsigned x = 0; x < bmp.width; ++x) {
            int offset             = x + y * bmp.width;
            unsigned char noiseVal = bmp.ptr[offset * 4];
            unsigned idx           = (int)((float)noiseVal / 255.f * nbins);
            hist_array[idx]++;
        }
    }

    for (unsigned b = 0; b < nbins; ++b) {
        hist_cols[3 * b + 0] = std::rand() / (float)RAND_MAX;
        hist_cols[3 * b + 1] = std::rand() / (float)RAND_MAX;
        hist_cols[3 * b + 2] = std::rand() / (float)RAND_MAX;
    }
}

CUDA¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <cuComplex.h>
#include <cuda_runtime.h>
#include <curand.h>
#include <curand_kernel.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>

const unsigned IMGW  = 256;
const unsigned IMGH  = 256;
const unsigned DIMX  = 1000;
const unsigned DIMY  = 800;
const unsigned NBINS = 256;

curandState_t* state;

struct Bitmap {
    unsigned char* ptr;
    unsigned width;
    unsigned height;
};

class PerlinNoise {
   public:
    float* base;
    float* perlin;

    PerlinNoise();
    ~PerlinNoise();
    void generateNoise();
};

Bitmap createBitmap(unsigned w, unsigned h);

void destroyBitmap(Bitmap& bmp);

void kernel(Bitmap& bmp, PerlinNoise& pn);

void populateBins(Bitmap& bmp, int* hist_array, const unsigned nbins,
                  float* hist_cols);

__global__ void setupRandomKernel(curandState* states,
                                  unsigned long long seed) {
    unsigned tid = blockDim.x * blockIdx.x + threadIdx.x;
    curand_init(seed, tid, 0, &states[tid]);
}

int main(void) {
    Bitmap bmp = createBitmap(IMGW, IMGH);

    FORGE_CUDA_CHECK(cudaMalloc((void**)&state, NBINS * sizeof(curandState_t)));
    // clang-format off
    setupRandomKernel<<<1, NBINS>>>(state, 314567);
    // clang-format on

    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Histogram Demo");
    wnd.makeCurrent();

    forge::Image img(IMGW, IMGH, FG_RGBA, forge::u8);

    forge::Chart chart(FG_CHART_2D);

    chart.setAxesLabelFormat("%3.1f", "%.2e");

    /* set x axis limits to maximum and minimum values of data
     * and y axis limits to range [0, number of pixels ideally]
     * but practically total number of pixels as y range will skew
     * the histogram graph vertically. Therefore setting it to
     * 25% of total number of pixels */
    chart.setAxesLimits(0, 1, 0, IMGW * IMGH / (float)(NBINS / 4.0));

    /*
     * Create histogram object specifying number of bins
     */
    forge::Histogram hist = chart.histogram(NBINS, forge::s32);
    /*
     * Set histogram colors
     */
    hist.setColor(FG_YELLOW);

    PerlinNoise noiseGenerator;
    int* histOut;
    float* histColors;

    FORGE_CUDA_CHECK(cudaMalloc((void**)&histOut, NBINS * sizeof(int)));
    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&histColors, 3 * NBINS * sizeof(float)));

    GfxHandle* handles[3];

    createGLBuffer(&handles[0], img.pixels(), FORGE_IMAGE_BUFFER);
    createGLBuffer(&handles[1], hist.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], hist.colors(), FORGE_VERTEX_BUFFER);

    unsigned frame = 0;
    do {
        if (frame % 8 == 0) {
            kernel(bmp, noiseGenerator);
            copyToGLBuffer(handles[0], (ComputeResourceHandle)bmp.ptr,
                           img.size());

            populateBins(bmp, histOut, NBINS, histColors);

            copyToGLBuffer(handles[1], (ComputeResourceHandle)histOut,
                           hist.verticesSize());
            copyToGLBuffer(handles[2], (ComputeResourceHandle)histColors,
                           hist.colorsSize());

            frame = 0;
        }

        /*
         * Split the window into grid regions
         */
        wnd.draw(1, 2, 0, img, "Dynamic Perlin Noise");
        wnd.draw(1, 2, 1, chart, "Histogram of Noisy Image");

        wnd.swapBuffers();
        frame++;
    } while (!wnd.close());

    FORGE_CUDA_CHECK(cudaFree(histOut));
    FORGE_CUDA_CHECK(cudaFree(histColors));
    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);

    return 0;
}

Bitmap createBitmap(unsigned w, unsigned h) {
    Bitmap retVal;
    retVal.width  = w;
    retVal.height = h;
    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&retVal.ptr, sizeof(unsigned char) * 4 * w * h));
    return retVal;
}

void destroyBitmap(Bitmap& bmp) { FORGE_CUDA_CHECK(cudaFree(bmp.ptr)); }

PerlinNoise::PerlinNoise() {
    const size_t IMG_SIZE = IMGW * IMGH * sizeof(float);

    FORGE_CUDA_CHECK(cudaMalloc((void**)&base, IMG_SIZE));
    FORGE_CUDA_CHECK(cudaMalloc((void**)&perlin, IMG_SIZE));
}

PerlinNoise::~PerlinNoise() {
    FORGE_CUDA_CHECK(cudaFree(base));
    FORGE_CUDA_CHECK(cudaFree(perlin));
}

inline int divup(int a, int b) { return (a + b - 1) / b; }

__device__ float interp(float x0, float x1, float alpha) {
    return x0 * (1 - alpha) + alpha * x1;
}

__global__ void perlinInitKernel(float* base, float* perlin,
                                 curandState* state) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < IMGW && y < IMGH) {
        int index     = y * IMGW + x;
        base[index]   = curand_uniform(&state[index % NBINS]);
        perlin[index] = 0.0f;
    }
}

__global__ void perlinComputeKernel(float* perlin, float* base, float amp,
                                    int period) {
    unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < IMGW && y < IMGH) {
        int index = y * IMGW + x;

        float freq = 1.0f / period;

        int si0      = (x / period) * period;
        int si1      = (si0 + period) % IMGW;
        float hblend = (x - si0) * freq;

        int sj0      = (y / period) * period;
        int sj1      = (sj0 + period) % IMGH;
        float vblend = (y - sj0) * freq;

        float top =
            interp(base[si0 + IMGW * sj0], base[si1 + IMGW * sj0], hblend);
        float bot =
            interp(base[si0 + IMGW * sj1], base[si1 + IMGW * sj1], hblend);

        perlin[index] += (amp * interp(top, bot, vblend));
    }
}

__global__ void perlinNormalize(float* perlin, float tamp) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < IMGW && y < IMGH) {
        int index     = y * IMGW + x;
        perlin[index] = perlin[index] / tamp;
    }
}

void PerlinNoise::generateNoise() {
    static dim3 threads(32, 8);
    dim3 blocks(divup(IMGW, threads.x), divup(IMGH, threads.y));

    float persistence = 0.5f;
    float amp         = 1.0f;
    float tamp        = 0.0f;

    // clang-format off
    perlinInitKernel<<<blocks, threads>>>(base, perlin, state);
    // clang-format on

    for (int octave = 6; octave >= 0; --octave) {
        int period = 1 << octave;

        // clang-format off
        perlinComputeKernel<<<blocks, threads>>>(perlin, base, amp, period);
        // clang-format on

        tamp += amp;
        amp *= persistence;
    }

    // clang-format off
    perlinNormalize<<<blocks, threads>>>(perlin, tamp);
    // clang-format on
}

__global__ void fillImageKernel(unsigned char* ptr, unsigned width,
                                unsigned height, float* perlin) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < width && y < height) {
        int offset = x + y * width;

        unsigned u = (unsigned)(IMGW * x / (float)(width));
        unsigned v = (unsigned)(IMGH * y / (float)(height));
        int idx    = u + v * IMGW;

        unsigned char val   = 255 * perlin[idx];
        ptr[offset * 4 + 0] = val;
        ptr[offset * 4 + 1] = val;
        ptr[offset * 4 + 2] = val;
        ptr[offset * 4 + 3] = 255;
    }
}

void kernel(Bitmap& bmp, PerlinNoise& pn) {
    static dim3 threads(32, 8);

    pn.generateNoise();

    dim3 blocks(divup(bmp.width, threads.x), divup(bmp.height, threads.y));

    // clang-format off
    fillImageKernel<<<blocks, threads>>>(bmp.ptr, bmp.width, bmp.height,
                                         pn.perlin);
    // clang-format on
}

__global__ void histogramKernel(const unsigned char* perlinNoise, int* histOut,
                                const unsigned nbins) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < IMGW && y < IMGH) {
        int offset             = y * IMGW + x;
        unsigned char noiseVal = perlinNoise[offset * 4 + 0];
        offset = __float2int_rd(nbins * (__int2float_rd(noiseVal) / 255.f));
        atomicAdd(histOut + offset, 1);
    }
}

__global__ void histColorsKernel(float* histColors, curandState* states) {
    int bin = blockIdx.x * blockDim.x + threadIdx.x;

    histColors[3 * bin + 0] = curand_uniform(&states[bin]);
    histColors[3 * bin + 1] = curand_uniform(&states[bin]);
    histColors[3 * bin + 2] = curand_uniform(&states[bin]);
}

void populateBins(Bitmap& bmp, int* histOut, const unsigned nbins,
                  float* histColors) {
    static const dim3 threads(8, 8);
    dim3 blocks(divup(bmp.width, threads.x), divup(bmp.height, threads.y));

    cudaMemset(histOut, 0, nbins * sizeof(int));

    // clang-format off
    histogramKernel<<<blocks, threads>>>(bmp.ptr, histOut, nbins);

    histColorsKernel<<<1, nbins>>>(histColors, state);
    // clang-format on
}

OpenCL¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#include <algorithm>
#include <cmath>
#include <ctime>
#include <iostream>
#include <iterator>
#include <sstream>
#include <vector>
#include "cl_helpers.h"

using namespace cl;
using namespace std;

const unsigned IMGW     = 256;
const unsigned IMGH     = 256;
const unsigned DIMX     = 1000;
const unsigned DIMY     = 800;
const unsigned IMG_SIZE = IMGW * IMGH * 4;
const unsigned NBINS    = 256;
const float PERSISTENCE = 0.5f;

#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>

// clang-format off
static const std::string perlinKernels =
R"EOK(
float rand(int x) {
    x = (x << 13) ^ x;
    return (1.0 - ((x * (x * x * 15731 + 789221) + 1376312589) & 0x7fffffff) /
                      1073741824.0);
}

float interp(float x0, float x1, float t) { return x0 + (x1 - x0) * t; }

kernel void init(global float* base, global float* perlin, int IMGW, int IMGH,
                 int randSeed) {
    int x = get_global_id(0);
    int y = get_global_id(1);

    if (x < IMGW && y < IMGH) {
        int i     = x + y * IMGW;
        base[i]   = (1 + rand(randSeed * i)) / 2.0f;
        perlin[i] = 0.0f;
    }
}

kernel void compute(global float* perlin, global float* base, unsigned IMGW,
                    unsigned IMGH, float amp, int period) {
    int x = get_global_id(0);
    int y = get_global_id(1);

    if (x < IMGW && y < IMGH) {
        int index = y * IMGW + x;

        float freq = 1.0f / period;

        int si0      = (x / period) * period;
        int si1      = (si0 + period) % IMGW;
        float hblend = (x - si0) * freq;

        int sj0      = (y / period) * period;
        int sj1      = (sj0 + period) % IMGH;
        float vblend = (y - sj0) * freq;

        float top =
            interp(base[si0 + IMGW * sj0], base[si1 + IMGW * sj0], hblend);
        float bot =
            interp(base[si0 + IMGW * sj1], base[si1 + IMGW * sj1], hblend);

        perlin[index] += (amp * interp(top, bot, vblend));
    }
}

kernel void normalizeNoise(global float* perlin, unsigned IMGW, unsigned IMGH,
                           float tamp) {
    int x = get_global_id(0);
    int y = get_global_id(1);

    if (x < IMGW && y < IMGH) {
        int index     = y * IMGW + x;
        perlin[index] = perlin[index] / tamp;
    }
}

kernel void fillImage(global unsigned char* ptr, unsigned width,
                      unsigned height, global float* perlin, unsigned IMGW,
                      unsigned IMGH) {
    int x = get_global_id(0);
    int y = get_global_id(1);

    if (x < width && y < height) {
        int offset = x + y * width;

        unsigned u = (unsigned)(IMGW * x / (float)(width));
        unsigned v = (unsigned)(IMGH * y / (float)(height));
        int idx    = u + v * IMGW;

        unsigned char val   = 255 * perlin[idx];
        ptr[offset * 4 + 0] = val;
        ptr[offset * 4 + 1] = val;
        ptr[offset * 4 + 2] = val;
        ptr[offset * 4 + 3] = 255;
    }
}

kernel void memSet(global int* out, unsigned len) {
    if (get_global_id(0) < len) out[get_global_id(0)] = 0;
}

kernel void histogram(const global unsigned char* perlinNoise,
                      global int* histOut, const unsigned w, const unsigned h,
                      const unsigned nbins) {
    int x = get_global_id(0);
    int y = get_global_id(1);

    if (x < w && y < h) {
        int offset             = y * w + x;
        unsigned char noiseVal = perlinNoise[offset * 4 + 0];
        offset                 = (int)(nbins * (noiseVal / 255.f));
        atomic_add(histOut + offset, 1);
    }
}

kernel void setColors(global float* out, unsigned rseed, unsigned gseed,
                      unsigned bseed) {
    int i          = get_global_id(0);
    out[3 * i + 0] = (1 + rand(rseed * i)) / 2.0f;
    out[3 * i + 1] = (1 + rand(gseed * i)) / 2.0f;
    out[3 * i + 2] = (1 + rand(bseed * i)) / 2.0f;
};
)EOK";
// clang-format on

inline
int divup(int a, int b)
{
    return (a + b - 1) / b;
}

void kernel(cl::Buffer& image, cl::Buffer& base, cl::Buffer& perlin,
            cl::Buffer& histOut, cl::Buffer& colors, cl::CommandQueue& queue,
            cl::Device& device) {
    static bool compileFlag = true;
    static cl::Program prog;
    static cl::Kernel initKernel, computeKernel, normKernel, fillKernel;
    static cl::Kernel memSetKernel, genHistogram, genHistColors;

    std::srand((unsigned)(std::time(0)));

    if (compileFlag) {
        try {
            prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), perlinKernels,
                               false);

            std::vector<cl::Device> devs;
            devs.push_back(device);
            prog.build(devs);

            initKernel    = cl::Kernel(prog, "init");
            computeKernel = cl::Kernel(prog, "compute");
            normKernel    = cl::Kernel(prog, "normalizeNoise");
            fillKernel    = cl::Kernel(prog, "fillImage");
            memSetKernel  = cl::Kernel(prog, "memSet");
            genHistogram  = cl::Kernel(prog, "histogram");
            genHistColors = cl::Kernel(prog, "setColors");
        } catch (cl::Error err) {
            std::cout << "Compile Errors: " << std::endl;
            std::cout << err.what() << err.err() << std::endl;
            std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
                      << std::endl;
            exit(255);
        }
        std::cout << "Kernels compiled successfully" << std::endl;
        compileFlag = false;
    }

    static const NDRange local(16, 16);
    NDRange global(local[0] * divup(IMGW, (int)(local[0])),
                   local[1] * divup(IMGH, (int)(local[1])));

    float persistence = 0.5f;
    float amp         = 1.0f;
    float tamp        = 0.0f;

    initKernel.setArg(0, base);
    initKernel.setArg(1, perlin);
    initKernel.setArg(2, IMGW);
    initKernel.setArg(3, IMGH);
    initKernel.setArg(4, std::rand());
    queue.enqueueNDRangeKernel(initKernel, cl::NullRange, global, local);

    for (int octave = 6; octave >= 0; --octave) {
        int period = 1 << octave;
        computeKernel.setArg(0, perlin);
        computeKernel.setArg(1, base);
        computeKernel.setArg(2, IMGW);
        computeKernel.setArg(3, IMGH);
        computeKernel.setArg(4, amp);
        computeKernel.setArg(5, period);
        queue.enqueueNDRangeKernel(computeKernel, cl::NullRange, global, local);
        tamp += amp;
        amp *= persistence;
    }

    normKernel.setArg(0, perlin);
    normKernel.setArg(1, IMGW);
    normKernel.setArg(2, IMGH);
    normKernel.setArg(3, tamp);
    queue.enqueueNDRangeKernel(normKernel, cl::NullRange, global, local);

    fillKernel.setArg(0, image);
    fillKernel.setArg(1, IMGW);
    fillKernel.setArg(2, IMGH);
    fillKernel.setArg(3, perlin);
    fillKernel.setArg(4, IMGW);
    fillKernel.setArg(5, IMGH);
    queue.enqueueNDRangeKernel(fillKernel, cl::NullRange, global, local);

    static const NDRange global_hist(NBINS);

    memSetKernel.setArg(0, histOut);
    memSetKernel.setArg(1, NBINS);
    queue.enqueueNDRangeKernel(memSetKernel, cl::NullRange, global_hist);

    genHistogram.setArg(0, image);
    genHistogram.setArg(1, histOut);
    genHistogram.setArg(2, IMGW);
    genHistogram.setArg(3, IMGH);
    genHistogram.setArg(4, NBINS);
    queue.enqueueNDRangeKernel(genHistogram, cl::NullRange, global, local);

    genHistColors.setArg(0, colors);
    genHistColors.setArg(1, std::rand());
    genHistColors.setArg(2, std::rand());
    genHistColors.setArg(3, std::rand());
    queue.enqueueNDRangeKernel(genHistColors, cl::NullRange, global_hist);
}

int main(void) {
    try {
        /*
         * First Forge call should be a window creation call
         * so that necessary OpenGL context is created for any
         * other forge::* object to be created successfully
         */
        forge::Window wnd(DIMX, DIMY, "Histogram Demo");
        wnd.makeCurrent();

        forge::Image img(IMGW, IMGH, FG_RGBA, forge::u8);

        forge::Chart chart(FG_CHART_2D);

        chart.setAxesLabelFormat("%3.1f", "%.2e");

        /* set x axis limits to maximum and minimum values of data
         * and y axis limits to range [0, number of pixels ideally]
         * but practically total number of pixels as y range will skew
         * the histogram graph vertically. Therefore setting it to
         * 25% of total number of pixels */
        chart.setAxesLimits(0, 1, 0, IMGW * IMGH / (float)(NBINS / 4.0));

        /*
         * Create histogram object specifying number of bins
         */
        forge::Histogram hist = chart.histogram(NBINS, forge::s32);
        /*
         * Set histogram colors
         */
        hist.setColor(FG_YELLOW);

        /*
         * Helper function to create a CLGL interop context.
         * This function checks for if the extension is available
         * and creates the context on the appropriate device.
         * Note: context and queue are defined in cl_helpers.h
         */
        context       = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue         = CommandQueue(context, device);

        cl::Buffer image(context, CL_MEM_READ_WRITE, IMG_SIZE);
        cl::Buffer baseNoise(context, CL_MEM_READ_WRITE, IMG_SIZE);
        cl::Buffer perlinNoise(context, CL_MEM_READ_WRITE, IMG_SIZE);
        cl::Buffer histOut(context, CL_MEM_READ_WRITE, NBINS * sizeof(int));
        cl::Buffer colors(context, CL_MEM_READ_WRITE,
                          3 * NBINS * sizeof(float));

        GfxHandle* handles[3];

        createGLBuffer(&handles[0], img.pixels(), FORGE_IMAGE_BUFFER);
        createGLBuffer(&handles[1], hist.vertices(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[2], hist.colors(), FORGE_VERTEX_BUFFER);

        unsigned frame = 0;
        do {
            if (frame % 8 == 0) {
                kernel(image, baseNoise, perlinNoise, histOut, colors, queue,
                       device);

                copyToGLBuffer(handles[0], (ComputeResourceHandle)image(),
                               img.size());
                copyToGLBuffer(handles[1], (ComputeResourceHandle)histOut(),
                               hist.verticesSize());
                copyToGLBuffer(handles[2], (ComputeResourceHandle)colors(),
                               hist.colorsSize());

                frame = 0;
            }

            /*
             * Split the window into grid regions
             */
            wnd.draw(1, 2, 0, img, "Dynamic Perlin Noise");
            wnd.draw(1, 2, 1, chart, "Histogram of Noisy Image");

            wnd.swapBuffers();
            frame++;
        } while (!wnd.close());

        releaseGLBuffer(handles[0]);
        releaseGLBuffer(handles[1]);
        releaseGLBuffer(handles[2]);

    } catch (forge::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }

    return 0;
}

3D Line Plot¶

CPU¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>

const unsigned DIMX = 1000;
const unsigned DIMY = 800;

static const float ZMIN = 0.1f;
static const float ZMAX = 10.f;

const float DX     = 0.005f;
const size_t ZSIZE = (size_t)((ZMAX - ZMIN) / DX + 1);

using namespace std;

void generateCurve(float t, float dx, std::vector<float>& vec) {
    vec.clear();
    for (int i = 0; i < (int)ZSIZE; ++i) {
        float z = ZMIN + i * dx;
        vec.push_back((float)(cos(z * t + t) / z));
        vec.push_back((float)(sin(z * t + t) / z));
        vec.push_back((float)(z + 0.1 * sin(t)));
    }
}

int main(void) {
    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Three dimensional line plot demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_3D);

    chart.setAxesLabelFormat("%3.1f", "%3.1f", "%.2e");

    chart.setAxesLimits(-1.1f, 1.1f, -1.1f, 1.1f, 0.f, 10.f);

    chart.setAxesTitles("x-axis", "y-axis", "z-axis");

    forge::Plot plot3 = chart.plot(ZSIZE, forge::f32);

    // generate a surface
    std::vector<float> function;
    static float t = 0;
    generateCurve(t, DX, function);

    GfxHandle* handle;
    createGLBuffer(&handle, plot3.vertices(), FORGE_VERTEX_BUFFER);

    /* copy your data into the pixel buffer object exposed by
     * forge::Plot class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */
    copyToGLBuffer(handle, (ComputeResourceHandle)function.data(),
                   plot3.verticesSize());

    do {
        t += 0.01f;
        generateCurve(t, DX, function);
        copyToGLBuffer(handle, (ComputeResourceHandle)function.data(),
                       plot3.verticesSize());
        wnd.draw(chart);
    } while (!wnd.close());

    releaseGLBuffer(handle);

    return 0;
}

CUDA¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <cuComplex.h>
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
#include <iostream>

const unsigned DIMX = 1000;
const unsigned DIMY = 800;

static const float ZMIN = 0.1f;
static const float ZMAX = 10.f;

const float DX     = 0.005f;
const size_t ZSIZE = (size_t)((ZMAX - ZMIN) / DX + 1);

void kernel(float t, float dx, float* dev_out);

int main(void) {
    float* dev_out;

    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Three dimensional line plot demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_3D);

    chart.setAxesLabelFormat("%3.1f", "%3.1f", "%.2e");

    chart.setAxesLimits(-1.1f, 1.1f, -1.1f, 1.1f, 0.f, 10.f);

    chart.setAxesTitles("x-axis", "y-axis", "z-axis");

    forge::Plot plot3 = chart.plot(ZSIZE, forge::f32);

    static float t = 0;
    FORGE_CUDA_CHECK(cudaMalloc((void**)&dev_out, ZSIZE * 3 * sizeof(float)));
    kernel(t, DX, dev_out);

    GfxHandle* handle;
    createGLBuffer(&handle, plot3.vertices(), FORGE_VERTEX_BUFFER);

    /* copy your data into the vertex buffer object exposed by
     * forge::Plot class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */
    copyToGLBuffer(handle, (ComputeResourceHandle)dev_out,
                   plot3.verticesSize());

    do {
        t += 0.01f;
        kernel(t, DX, dev_out);
        copyToGLBuffer(handle, (ComputeResourceHandle)dev_out,
                       plot3.verticesSize());
        wnd.draw(chart);
    } while (!wnd.close());

    FORGE_CUDA_CHECK(cudaFree(dev_out));
    releaseGLBuffer(handle);
    return 0;
}

__global__ void generateCurve(float t, float dx, float* out, const float ZMIN,
                              const size_t ZSIZE) {
    int offset = blockIdx.x * blockDim.x + threadIdx.x;

    float z = ZMIN + offset * dx;
    if (offset < ZSIZE) {
        out[3 * offset]     = cos(z * t + t) / z;
        out[3 * offset + 1] = sin(z * t + t) / z;
        out[3 * offset + 2] = z + 0.1 * sin(t);
    }
}

inline int divup(int a, int b) { return (a + b - 1) / b; }

void kernel(float t, float dx, float* dev_out) {
    static const dim3 threads(1024);
    dim3 blocks(divup(ZSIZE, 1024));

    // clang-format off
    generateCurve<<<blocks, threads>>>(t, dx, dev_out, ZMIN, ZSIZE);
    // clang-format on
}

OpenCL¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>

#include "cl_helpers.h"

#include <algorithm>
#include <cmath>
#include <complex>
#include <iostream>
#include <iterator>
#include <mutex>
#include <vector>

const unsigned DIMX = 1000;
const unsigned DIMY = 800;

static const float ZMIN = 0.1f;
static const float ZMAX = 10.f;

const float DX              = 0.005f;
static const unsigned ZSIZE = (unsigned)((ZMAX - ZMIN) / DX + 1);

using namespace std;

#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>

// clang-format off
static const std::string sincos_surf_kernel =
R"EOK(
kernel
void generateCurve(global float* out, const float t,
                   const float dx, const float zmin,
                   const unsigned SIZE) {
    int offset = get_global_id(0);
    float z = zmin + offset * dx;
    if (offset < SIZE) {
       out[offset*3 + 0] = cos(z*t+t)/z;
       out[offset*3 + 1] = sin(z*t+t)/z;
       out[offset*3 + 2] = z + 0.1*sin(t);
    }
}
)EOK";
// clang-format on

inline int divup(int a, int b) {
    return (a + b - 1) / b;
}

void kernel(cl::Buffer& devOut, cl::CommandQueue& queue, float t) {
    static std::once_flag compileFlag;
    static cl::Program prog;
    static cl::Kernel kern;

    std::call_once(compileFlag, [queue]() {
        prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(),
                           sincos_surf_kernel, true);
        kern = cl::Kernel(prog, "generateCurve");
    });

    NDRange global(ZSIZE);

    kern.setArg(0, devOut);
    kern.setArg(1, t);
    kern.setArg(2, DX);
    kern.setArg(3, ZMIN);
    kern.setArg(4, ZSIZE);
    queue.enqueueNDRangeKernel(kern, cl::NullRange, global);
}

int main(void) {
    try {
        /*
         * First Forge call should be a window creation call
         * so that necessary OpenGL context is created for any
         * other forge::* object to be created successfully
         */
        forge::Window wnd(DIMX, DIMY, "Three dimensional line plot demo");
        wnd.makeCurrent();

        forge::Chart chart(FG_CHART_3D);

        chart.setAxesLabelFormat("%3.1f", "%3.1f", "%.2e");

        chart.setAxesLimits(-1.1f, 1.1f, -1.1f, 1.1f, 0.f, 10.f);

        chart.setAxesTitles("x-axis", "y-axis", "z-axis");

        forge::Plot plot3 = chart.plot(ZSIZE, forge::f32);

        /*
         * Helper function to create a CLGL interop context.
         * This function checks for if the extension is available
         * and creates the context on the appropriate device.
         * Note: context and queue are defined in cl_helpers.h
         */
        context       = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue         = CommandQueue(context, device);

        cl::Buffer devOut(context, CL_MEM_READ_WRITE,
                          sizeof(float) * ZSIZE * 3);
        static float t = 0;
        kernel(devOut, queue, t);

        GfxHandle* handle;
        createGLBuffer(&handle, plot3.vertices(), FORGE_VERTEX_BUFFER);
        /* copy your data into the pixel buffer object exposed by
         * forge::Surface class and then proceed to rendering.
         * To help the users with copying the data from compute
         * memory to display memory, Forge provides copy headers
         * along with the library to help with this task
         */
        copyToGLBuffer(handle, (ComputeResourceHandle)devOut(),
                       plot3.verticesSize());

        do {
            t += 0.01f;
            kernel(devOut, queue, t);
            copyToGLBuffer(handle, (ComputeResourceHandle)devOut(),
                           plot3.verticesSize());
            wnd.draw(chart);
        } while (!wnd.close());

        releaseGLBuffer(handle);

    } catch (forge::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }
    return 0;
}

Multiple line plots¶

CPU¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>

const unsigned DIMX = 1000;
const unsigned DIMY = 800;

const float FRANGE_START = 0.f;
const float FRANGE_END   = 2.f * 3.1415926f;

using namespace std;
void map_range_to_vec_vbo(float range_start, float range_end, float dx,
                          std::vector<float>& vec, float (*map)(float)) {
    if (range_start > range_end && dx > 0) return;
    for (float i = range_start; i < range_end; i += dx) {
        vec.push_back(i);
        vec.push_back((*map)(i));
    }
}

int main(void) {
    std::vector<float> sinData;
    std::vector<float> cosData;
    std::vector<float> tanData;
    std::vector<float> logData;
    map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, sinData, &sinf);
    map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, cosData, &cosf);
    map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, tanData, &tanf);
    map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, logData, &log10f);

    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Plotting Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_2D);
    chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);

    /* Create several plot objects which creates the necessary
     * vertex buffer objects to hold the different plot types
     */
    forge::Plot plt0 = chart.plot((unsigned)(sinData.size() / 2),
                                  forge::f32);  // create a default plot
    forge::Plot plt1 =
        chart.plot((unsigned)(cosData.size() / 2), forge::f32, FG_PLOT_LINE,
                   FG_MARKER_NONE);  // or specify a specific plot type
    forge::Plot plt2 = chart.plot(
        (unsigned)(tanData.size() / 2), forge::f32, FG_PLOT_LINE,
        FG_MARKER_TRIANGLE);  // last parameter specifies marker shape
    forge::Plot plt3 = chart.plot((unsigned)(logData.size() / 2), forge::f32,
                                  FG_PLOT_SCATTER, FG_MARKER_CROSS);

    /*
     * Set plot colors
     */
    plt0.setColor(FG_RED);
    plt1.setColor(FG_BLUE);
    plt2.setColor(FG_YELLOW);                 // use a forge predefined color
    plt3.setColor((forge::Color)0x257973FF);  // or any hex-valued color
    /*
     * Set plot legends
     */
    plt0.setLegend("Sine");
    plt1.setLegend("Cosine");
    plt2.setLegend("Tangent");
    plt3.setLegend("Log base 10");

    GfxHandle* handles[4];
    createGLBuffer(&handles[0], plt0.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[1], plt1.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], plt2.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[3], plt3.vertices(), FORGE_VERTEX_BUFFER);

    /* copy your data into the pixel buffer object exposed by
     * forge::Plot class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */
    copyToGLBuffer(handles[0], (ComputeResourceHandle)sinData.data(),
                   plt0.verticesSize());
    copyToGLBuffer(handles[1], (ComputeResourceHandle)cosData.data(),
                   plt1.verticesSize());
    copyToGLBuffer(handles[2], (ComputeResourceHandle)tanData.data(),
                   plt2.verticesSize());
    copyToGLBuffer(handles[3], (ComputeResourceHandle)logData.data(),
                   plt3.verticesSize());

    do { wnd.draw(chart); } while (!wnd.close());

    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);
    releaseGLBuffer(handles[3]);

    return 0;
}

CUDA¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <cuComplex.h>
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
#include <iostream>

const unsigned DIMX = 1000;
const unsigned DIMY = 800;

static const float dx           = 0.1f;
static const float FRANGE_START = 0.f;
static const float FRANGE_END   = 2 * 3.141592f;
static const size_t DATA_SIZE   = (size_t)((FRANGE_END - FRANGE_START) / dx);

void kernel(float* dev_out, int functionCode);

int main(void) {
    float* sin_out;
    float* cos_out;
    float* tan_out;
    float* log_out;

    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "Plotting Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_2D);
    chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);

    /* Create several plot objects which creates the necessary
     * vertex buffer objects to hold the different plot types
     */
    forge::Plot plt0 =
        chart.plot(DATA_SIZE, forge::f32);  // create a default plot
    forge::Plot plt1 =
        chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE,
                   FG_MARKER_NONE);  // or specify a specific plot type
    forge::Plot plt2 = chart.plot(
        DATA_SIZE, forge::f32, FG_PLOT_LINE,
        FG_MARKER_TRIANGLE);  // last parameter specifies marker shape
    forge::Plot plt3 =
        chart.plot(DATA_SIZE, forge::f32, FG_PLOT_SCATTER, FG_MARKER_CROSS);

    /*
     * Set plot colors
     */
    plt0.setColor(FG_RED);
    plt1.setColor(FG_BLUE);
    plt2.setColor(FG_YELLOW);                 // use a forge predefined color
    plt3.setColor((forge::Color)0x257973FF);  // or any hex-valued color
    /*
     * Set plot legends
     */
    plt0.setLegend("Sine");
    plt1.setLegend("Cosine");
    plt2.setLegend("Tangent");
    plt3.setLegend("Log base 10");

    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&sin_out, sizeof(float) * DATA_SIZE * 2));
    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&cos_out, sizeof(float) * DATA_SIZE * 2));
    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&tan_out, sizeof(float) * DATA_SIZE * 2));
    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&log_out, sizeof(float) * DATA_SIZE * 2));

    kernel(sin_out, 0);
    kernel(cos_out, 1);
    kernel(tan_out, 2);
    kernel(log_out, 3);

    GfxHandle* handles[4];
    createGLBuffer(&handles[0], plt0.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[1], plt1.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], plt2.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[3], plt3.vertices(), FORGE_VERTEX_BUFFER);

    /* copy your data into the vertex buffer object exposed by
     * forge::Plot class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */
    copyToGLBuffer(handles[0], (ComputeResourceHandle)sin_out,
                   plt0.verticesSize());
    copyToGLBuffer(handles[1], (ComputeResourceHandle)cos_out,
                   plt1.verticesSize());
    copyToGLBuffer(handles[2], (ComputeResourceHandle)tan_out,
                   plt2.verticesSize());
    copyToGLBuffer(handles[3], (ComputeResourceHandle)log_out,
                   plt3.verticesSize());

    do { wnd.draw(chart); } while (!wnd.close());

    FORGE_CUDA_CHECK(cudaFree(sin_out));
    FORGE_CUDA_CHECK(cudaFree(cos_out));
    FORGE_CUDA_CHECK(cudaFree(tan_out));
    FORGE_CUDA_CHECK(cudaFree(log_out));
    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);
    releaseGLBuffer(handles[3]);

    return 0;
}

__global__ void simple_sinf(float* out, const size_t _data_size, int fnCode,
                            const float _dx, const float _frange_start) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;

    if (i < _data_size) {
        float x  = _frange_start + i * _dx;
        int idx  = 2 * i;
        out[idx] = x;

        switch (fnCode) {
            case 0: out[idx + 1] = sinf(x); break;
            case 1: out[idx + 1] = cosf(x); break;
            case 2: out[idx + 1] = tanf(x); break;
            case 3: out[idx + 1] = log10f(x); break;
        }
    }
}

inline int divup(int a, int b) { return (a + b - 1) / b; }

void kernel(float* dev_out, int functionCode) {
    static const dim3 threads(1024);
    dim3 blocks(divup(DATA_SIZE, 1024));

    // clang-format off
    simple_sinf<<<blocks, threads>>>(dev_out, DATA_SIZE, functionCode, dx,
                                     FRANGE_START);
    // clang-format on
}

OpenCL¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>

#include "cl_helpers.h"

#include <algorithm>
#include <iostream>
#include <iterator>
#include <mutex>
#include <sstream>

using namespace cl;
using namespace std;

const unsigned DIMX = 1000;
const unsigned DIMY = 800;

const float dx           = 0.1f;
const float FRANGE_START = 0.f;
const float FRANGE_END   = 2 * 3.141592f;
const unsigned DATA_SIZE = (unsigned)((FRANGE_END - FRANGE_START) / dx);

#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>

// clang-format off
static const std::string sinf_ocl_kernel =
R"EOK(
kernel void sinf(global float* out, const float dx, const unsigned DATA_SIZE,
                 int fnCode) {
    unsigned x = get_global_id(0);
    if (x < DATA_SIZE) {
        out[2 * x] = x * dx;
        switch (fnCode) {
            case 0: out[2 * x + 1] = sin(x * dx); break;
            case 1: out[2 * x + 1] = cos(x * dx); break;
            case 2: out[2 * x + 1] = tan(x * dx); break;
            case 3: out[2 * x + 1] = log10(x * dx); break;
        }
    }
}
)EOK";
// clang-format on

void kernel(cl::Buffer& devOut, cl::CommandQueue& queue, int fnCode)
{
    static std::once_flag compileFlag;
    static cl::Program prog;
    static cl::Kernel kern;

    std::call_once(compileFlag, [queue]() {
        prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), sinf_ocl_kernel,
                           true);
        kern = cl::Kernel(prog, "sinf");
    });

    static const NDRange global(DATA_SIZE * 2);

    kern.setArg(0, devOut);
    kern.setArg(1, dx);
    kern.setArg(2, DATA_SIZE);
    kern.setArg(3, fnCode);
    queue.enqueueNDRangeKernel(kern, cl::NullRange, global);
}

int main(void) {
    try {
        /*
         * First Forge call should be a window creation call
         * so that necessary OpenGL context is created for any
         * other forge::* object to be created successfully
         */
        forge::Window wnd(DIMX, DIMY, "Plotting Demo");
        wnd.makeCurrent();

        forge::Chart chart(FG_CHART_2D);
        chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);

        /* Create several plot objects which creates the necessary
         * vertex buffer objects to hold the different plot types
         */
        forge::Plot plt0 =
            chart.plot(DATA_SIZE, forge::f32);  // create a default plot
        forge::Plot plt1 =
            chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE,
                       FG_MARKER_NONE);  // or specify a specific plot type
        forge::Plot plt2 = chart.plot(
            DATA_SIZE, forge::f32, FG_PLOT_LINE,
            FG_MARKER_TRIANGLE);  // last parameter specifies marker shape
        forge::Plot plt3 =
            chart.plot(DATA_SIZE, forge::f32, FG_PLOT_SCATTER, FG_MARKER_CROSS);

        /*
         * Set plot colors
         */
        plt0.setColor(FG_RED);
        plt1.setColor(FG_BLUE);
        plt2.setColor(FG_YELLOW);  // use a forge predefined color
        plt3.setColor((forge::Color)0x257973FF);  // or any hex-valued color
        /*
         * Set plot legends
         */
        plt0.setLegend("Sine");
        plt1.setLegend("Cosine");
        plt2.setLegend("Tangent");
        plt3.setLegend("Log base 10");

        /*
         * Helper function to create a CLGL interop context.
         * This function checks for if the extension is available
         * and creates the context on the appropriate device.
         * Note: context and queue are defined in cl_helpers.h
         */
        context       = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue         = CommandQueue(context, device);

        cl::Buffer sinOut(context, CL_MEM_READ_WRITE,
                          sizeof(float) * DATA_SIZE * 2);
        cl::Buffer cosOut(context, CL_MEM_READ_WRITE,
                          sizeof(float) * DATA_SIZE * 2);
        cl::Buffer tanOut(context, CL_MEM_READ_WRITE,
                          sizeof(float) * DATA_SIZE * 2);
        cl::Buffer logOut(context, CL_MEM_READ_WRITE,
                          sizeof(float) * DATA_SIZE * 2);
        kernel(sinOut, queue, 0);
        kernel(cosOut, queue, 1);
        kernel(tanOut, queue, 2);
        kernel(logOut, queue, 3);

        GfxHandle* handles[4];
        createGLBuffer(&handles[0], plt0.vertices(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[1], plt1.vertices(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[2], plt2.vertices(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[3], plt3.vertices(), FORGE_VERTEX_BUFFER);
        /* copy your data into the vertex buffer object exposed by
         * forge::Plot class and then proceed to rendering.
         * To help the users with copying the data from compute
         * memory to display memory, Forge provides copy headers
         * along with the library to help with this task
         */
        copyToGLBuffer(handles[0], (ComputeResourceHandle)sinOut(),
                       plt0.verticesSize());
        copyToGLBuffer(handles[1], (ComputeResourceHandle)cosOut(),
                       plt1.verticesSize());
        copyToGLBuffer(handles[2], (ComputeResourceHandle)tanOut(),
                       plt2.verticesSize());
        copyToGLBuffer(handles[3], (ComputeResourceHandle)logOut(),
                       plt3.verticesSize());

        do { wnd.draw(chart); } while (!wnd.close());

        releaseGLBuffer(handles[0]);
        releaseGLBuffer(handles[1]);
        releaseGLBuffer(handles[2]);
        releaseGLBuffer(handles[3]);

    } catch (forge::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }
    return 0;
}

3D Vector Stream¶

CPU¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>

const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float PI      = 3.14159265359f;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP    = 2.0f;
const int NELEMS    = (int)((MAXIMUM - MINIMUM + 1) / STEP);

using namespace std;

void generateColors(std::vector<float>& colors) {
    static const float AF_BLUE[]   = {0.0588f, 0.1137f, 0.2745f, 1.0f};
    static const float AF_ORANGE[] = {0.8588f, 0.6137f, 0.0745f, 1.0f};

    int numElems = NELEMS * NELEMS * NELEMS;
    colors.clear();
    for (int i = 0; i < numElems; ++i) {
        if ((i % 2) == 0) {
            colors.push_back(AF_ORANGE[0]);
            colors.push_back(AF_ORANGE[1]);
            colors.push_back(AF_ORANGE[2]);
        } else {
            colors.push_back(AF_BLUE[0]);
            colors.push_back(AF_BLUE[1]);
            colors.push_back(AF_BLUE[2]);
        }
    }
}

void generatePoints(std::vector<float>& points, std::vector<float>& dirs) {
    points.clear();

    for (int k = 0; k < NELEMS; ++k) {
        float z = MINIMUM + k * STEP;
        for (int j = 0; j < NELEMS; ++j) {
            float y = MINIMUM + j * STEP;
            for (int i = 0; i < NELEMS; ++i) {
                float x = MINIMUM + i * STEP;
                points.push_back(x);
                points.push_back(y);
                points.push_back(z);
                dirs.push_back(x - 10.0f);
                dirs.push_back(y - 10.0f);
                dirs.push_back(z - 10.0f);
            }
        }
    }
}

int main(void) {
    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "3D Vector Field Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_3D);
    chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM,
                        MINIMUM - 1.0f, MAXIMUM);
    chart.setAxesTitles("x-axis", "y-axis", "z-axis");

    int numElems             = NELEMS * NELEMS * NELEMS;
    forge::VectorField field = chart.vectorField(numElems, forge::f32);
    field.setColor(0.f, 1.f, 0.f, 1.f);

    std::vector<float> points;
    std::vector<float> colors;
    std::vector<float> dirs;
    generatePoints(points, dirs);
    generateColors(colors);

    GfxHandle* handles[3];
    createGLBuffer(&handles[0], field.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[1], field.colors(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);

    copyToGLBuffer(handles[0], (ComputeResourceHandle)points.data(),
                   field.verticesSize());
    copyToGLBuffer(handles[1], (ComputeResourceHandle)colors.data(),
                   field.colorsSize());
    copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs.data(),
                   field.directionsSize());

    do { wnd.draw(chart); } while (!wnd.close());

    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);

    return 0;
}

CUDA¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>

const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP    = 2.0f;
const int NELEMS    = (int)((MAXIMUM - MINIMUM + 1) / STEP);

void generateColors(float* colors);

void generatePoints(float* points, float* dirs);

inline int divup(int a, int b) { return (a + b - 1) / b; }

int main(void) {
    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "3D Vector Field Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_3D);
    chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM,
                        MINIMUM - 1.0f, MAXIMUM);
    chart.setAxesTitles("x-axis", "y-axis", "z-axis");

    int numElems             = NELEMS * NELEMS * NELEMS;
    forge::VectorField field = chart.vectorField(numElems, forge::f32);
    field.setColor(0.f, 1.f, 0.f, 1.f);

    float* points;
    float* colors;
    float* dirs;

    FORGE_CUDA_CHECK(cudaMalloc((void**)&points, 3 * numElems * sizeof(float)));
    FORGE_CUDA_CHECK(cudaMalloc((void**)&colors, 3 * numElems * sizeof(float)));
    FORGE_CUDA_CHECK(cudaMalloc((void**)&dirs, 3 * numElems * sizeof(float)));

    generatePoints(points, dirs);
    generateColors(colors);

    GfxHandle* handles[3];
    createGLBuffer(&handles[0], field.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[1], field.colors(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);

    copyToGLBuffer(handles[0], (ComputeResourceHandle)points,
                   field.verticesSize());
    copyToGLBuffer(handles[1], (ComputeResourceHandle)colors,
                   field.colorsSize());
    copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs,
                   field.directionsSize());

    do { wnd.draw(chart); } while (!wnd.close());

    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);

    FORGE_CUDA_CHECK(cudaFree(points));
    FORGE_CUDA_CHECK(cudaFree(colors));
    FORGE_CUDA_CHECK(cudaFree(dirs));

    return 0;
}

__global__ void genColorsKernel(float* colors, int nelems) {
    const float AF_BLUE[4]   = {0.0588f, 0.1137f, 0.2745f, 1.0f};
    const float AF_ORANGE[4] = {0.8588f, 0.6137f, 0.0745f, 1.0f};

    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < nelems) {
        if (i % 2 == 0) {
            colors[3 * i + 0] = AF_ORANGE[0];
            colors[3 * i + 1] = AF_ORANGE[1];
            colors[3 * i + 2] = AF_ORANGE[2];
        } else {
            colors[3 * i + 0] = AF_BLUE[0];
            colors[3 * i + 1] = AF_BLUE[1];
            colors[3 * i + 2] = AF_BLUE[2];
        }
    }
}

void generateColors(float* colors) {
    const int numElems = NELEMS * NELEMS * NELEMS;
    static const dim3 threads(512);
    dim3 blocks(divup(numElems, threads.x));

    // clang-format off
    genColorsKernel<<<blocks, threads>>>(colors, numElems);
    // clang-format on
}

__global__ void pointGenKernel(float* points, float* dirs, int nBBS0,
                               int nelems, float minimum, float step) {
    int k = blockIdx.x / nBBS0;
    int i = blockDim.x * (blockIdx.x - k * nBBS0) + threadIdx.x;
    int j = blockDim.y * blockIdx.y + threadIdx.y;

    if (i < nelems && j < nelems && k < nelems) {
        float x = minimum + i * step;
        float y = minimum + j * step;
        float z = minimum + k * step;

        int id = i + j * nelems + k * nelems * nelems;

        points[3 * id + 0] = x;
        points[3 * id + 1] = y;
        points[3 * id + 2] = z;

        dirs[3 * id + 0] = x - 10.f;
        dirs[3 * id + 1] = y - 10.f;
        dirs[3 * id + 2] = z - 10.f;
    }
}

void generatePoints(float* points, float* dirs) {
    static dim3 threads(8, 8);

    int blk_x = divup(NELEMS, threads.x);
    int blk_y = divup(NELEMS, threads.y);

    dim3 blocks(blk_x * NELEMS, blk_y);

    // clang-format off
    pointGenKernel<<<blocks, threads>>>(points, dirs, blk_x, NELEMS, MINIMUM,
                                        STEP);
    // clang-format on
}

OpenCL¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>

#include "cl_helpers.h"

#include <algorithm>
#include <cmath>
#include <ctime>
#include <iostream>
#include <iterator>
#include <sstream>
#include <vector>

using namespace cl;
using namespace std;

const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP    = 2.0f;
const int NELEMS    = (int)((MAXIMUM - MINIMUM + 1) / STEP);

#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>

// clang-format off
static const std::string streamKernel =
R"EOK(
constant float AF_BLUE[4]         = {0.0588f, 0.1137f, 0.2745f, 1.0f};
constant float AF_ORANGE[4]           = {0.8588f, 0.6137f, 0.0745f, 1.0f};

kernel void genColorsKernel(global float* colors, int NELEMS) {
    const size_t nelems = NELEMS * NELEMS * NELEMS;

    int i = get_global_id(0);

    if (i < nelems) {
        if (i % 2 == 0) {
            colors[3 * i + 0] = AF_ORANGE[0];
            colors[3 * i + 1] = AF_ORANGE[1];
            colors[3 * i + 2] = AF_ORANGE[2];
        } else {
            colors[3 * i + 0] = AF_BLUE[0];
            colors[3 * i + 1] = AF_BLUE[1];
            colors[3 * i + 2] = AF_BLUE[2];
        }
    }
}

kernel void pointGenKernel(global float* points, global float* dirs, int nBBS0,
                           int NELEMS, float MINIMUM, float STEP) {
    int k = get_group_id(0) / nBBS0;
    int i = get_local_size(0) * (get_group_id(0) - k * nBBS0) + get_local_id(0);
    int j = get_global_id(1);

    if (i < NELEMS && j < NELEMS && k < NELEMS) {
        float x = MINIMUM + i * STEP;
        float y = MINIMUM + j * STEP;
        float z = MINIMUM + k * STEP;

        int id = i + j * NELEMS + k * NELEMS * NELEMS;

        points[3 * id + 0] = x;
        points[3 * id + 1] = y;
        points[3 * id + 2] = z;

        dirs[3 * id + 0] = x - 10.f;
        dirs[3 * id + 1] = y - 10.f;
        dirs[3 * id + 2] = z - 10.f;
    }
}
)EOK";
// clang-format on

inline int divup(int a, int b)
{
    return (a + b - 1) / b;
}

void generatePoints(Buffer& points, Buffer& dirs, Buffer& colors,
                    CommandQueue& queue, Device& device) {
    static bool compileFlag = true;

    static cl::Program prog;
    static cl::Kernel pointGenKernel;
    static cl::Kernel colorsKernel;

    if (compileFlag) {
        try {
            prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), streamKernel,
                               false);

            std::vector<cl::Device> devs;
            devs.push_back(device);
            prog.build(devs);

            pointGenKernel = cl::Kernel(prog, "pointGenKernel");
            colorsKernel   = cl::Kernel(prog, "genColorsKernel");
        } catch (cl::Error err) {
            std::cout << "Compile Errors: " << std::endl;
            std::cout << err.what() << err.err() << std::endl;
            std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
                      << std::endl;
            exit(255);
        }
        std::cout << "Kernels compiled successfully" << std::endl;
        compileFlag = false;
    }

    static const NDRange local(8, 8);
    int blk_x = divup(NELEMS, (int)(local[0]));
    int blk_y = divup(NELEMS, (int)(local[1]));

    NDRange global(NELEMS * local[0] * blk_x, local[1] * blk_y);

    pointGenKernel.setArg(0, points);
    pointGenKernel.setArg(1, dirs);
    pointGenKernel.setArg(2, blk_x);
    pointGenKernel.setArg(3, NELEMS);
    pointGenKernel.setArg(4, MINIMUM);
    pointGenKernel.setArg(5, STEP);
    queue.enqueueNDRangeKernel(pointGenKernel, cl::NullRange, global, local);
    const int numElems = NELEMS * NELEMS * NELEMS;
    static const NDRange thrds(64, 1);
    NDRange glob(thrds[0] * divup(numElems, (int)(thrds[0])), (int)(thrds[1]));

    colorsKernel.setArg(0, colors);
    colorsKernel.setArg(1, NELEMS);
    queue.enqueueNDRangeKernel(colorsKernel, cl::NullRange, glob, thrds);
}

int main(void) {
    try {
        /*
         * First Forge call should be a window creation call
         * so that necessary OpenGL context is created for any
         * other forge::* object to be created successfully
         */
        forge::Window wnd(DIMX, DIMY, "3D Vector Field Demo");
        wnd.makeCurrent();

        forge::Chart chart(FG_CHART_3D);
        chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM,
                            MINIMUM - 1.0f, MAXIMUM);
        chart.setAxesTitles("x-axis", "y-axis", "z-axis");

        int numElems             = NELEMS * NELEMS * NELEMS;
        forge::VectorField field = chart.vectorField(numElems, forge::f32);
        field.setColor(0.f, 1.f, 0.f, 1.f);

        /*
         * Helper function to create a CLGL interop context.
         * This function checks for if the extension is available
         * and creates the context on the appropriate device.
         * Note: context and queue are defined in cl_helpers.h
         */
        context       = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue         = CommandQueue(context, device);

        cl::Buffer points(context, CL_MEM_READ_WRITE,
                          sizeof(float) * 3 * numElems);
        cl::Buffer colors(context, CL_MEM_READ_WRITE,
                          sizeof(float) * 3 * numElems);
        cl::Buffer dirs(context, CL_MEM_READ_WRITE,
                        sizeof(float) * 3 * numElems);

        GfxHandle* handles[3];
        createGLBuffer(&handles[0], field.vertices(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[1], field.colors(), FORGE_VERTEX_BUFFER);
        createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);

        generatePoints(points, dirs, colors, queue, device);

        copyToGLBuffer(handles[0], (ComputeResourceHandle)points(),
                       field.verticesSize());
        copyToGLBuffer(handles[1], (ComputeResourceHandle)colors(),
                       field.colorsSize());
        copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs(),
                       field.directionsSize());

        do { wnd.draw(chart); } while (!wnd.close());

        releaseGLBuffer(handles[0]);
        releaseGLBuffer(handles[1]);
        releaseGLBuffer(handles[2]);

    } catch (forge::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }

    return 0;
}

Surfaces¶

CPU¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>

using namespace std;

static const float XMIN = -8.0f;
static const float XMAX = 8.0f;
static const float YMIN = -8.0f;
static const float YMAX = 8.0f;

const float DX     = 0.5;
const size_t XSIZE = (size_t)((XMAX - XMIN) / DX);
const size_t YSIZE = (size_t)((YMAX - YMIN) / DX);

void genSurface(float dx, std::vector<float>& vec) {
    vec.clear();
    for (float x = XMIN; x < XMAX; x += dx) {
        for (float y = YMIN; y < YMAX; y += dx) {
            vec.push_back(x);
            vec.push_back(y);
            float z = sqrt(x * x + y * y) + 2.2204e-16f;
            vec.push_back(sin(z) / z);
        }
    }
}

int main(void) {
    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(1024, 768, "3d Surface Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_3D);
    chart.setAxesLimits(XMIN - 2.0f, XMAX + 2.0f, YMIN - 2.0f, YMAX + 2.0f,
                        -0.5f, 1.f);
    chart.setAxesTitles("x-axis", "y-axis", "z-axis");

    forge::Surface surf = chart.surface(XSIZE, YSIZE, forge::f32);
    surf.setColor(FG_YELLOW);

    // generate a surface
    std::vector<float> function;

    genSurface(DX, function);

    GfxHandle* handle;
    createGLBuffer(&handle, surf.vertices(), FORGE_VERTEX_BUFFER);

    /* copy your data into the pixel buffer object exposed by
     * forge::Plot class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */
    copyToGLBuffer(handle, (ComputeResourceHandle)function.data(),
                   surf.verticesSize());

    do { wnd.draw(chart); } while (!wnd.close());

    releaseGLBuffer(handle);

    return 0;
}

CUDA¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <cuComplex.h>
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
#include <iostream>

const float XMIN = -8.0f;
const float XMAX = 8.f;
const float YMIN = -8.0f;
const float YMAX = 8.f;

const float DX     = 0.5;
const size_t XSIZE = (size_t)((XMAX - XMIN) / DX);
const size_t YSIZE = (size_t)((YMAX - YMIN) / DX);

void kernel(float dx, float* dev_out);

int main(void) {
    float* dev_out;

    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(1024, 768, "3d Surface Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_3D);
    chart.setAxesLimits(-10.f, 10.f, -10.f, 10.f, -0.5f, 1.f);
    chart.setAxesTitles("x-axis", "y-axis", "z-axis");

    forge::Surface surf = chart.surface(XSIZE, YSIZE, forge::f32);
    surf.setColor(FG_YELLOW);

    FORGE_CUDA_CHECK(
        cudaMalloc((void**)&dev_out, XSIZE * YSIZE * 3 * sizeof(float)));
    kernel(DX, dev_out);

    GfxHandle* handle;
    createGLBuffer(&handle, surf.vertices(), FORGE_VERTEX_BUFFER);
    /* copy your data into the vertex buffer object exposed by
     * forge::Plot class and then proceed to rendering.
     * To help the users with copying the data from compute
     * memory to display memory, Forge provides copy headers
     * along with the library to help with this task
     */
    copyToGLBuffer(handle, (ComputeResourceHandle)dev_out, surf.verticesSize());

    do { wnd.draw(chart); } while (!wnd.close());

    FORGE_CUDA_CHECK(cudaFree(dev_out));
    releaseGLBuffer(handle);
    return 0;
}

__global__ void sincos_surf(float dx, float* out, const float XMIN,
                            const float YMIN, const size_t XSIZE,
                            const size_t YSIZE) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    int j = blockIdx.y * blockDim.y + threadIdx.y;

    float x = XMIN + i * dx;
    float y = YMIN + j * dx;
    if (i < XSIZE && j < YSIZE) {
        int offset          = j + i * YSIZE;
        out[3 * offset]     = x;
        out[3 * offset + 1] = y;
        float z             = sqrt(x * x + y * y) + 2.2204e-16;
        out[3 * offset + 2] = sinf(z) / z;
    }
}

inline int divup(int a, int b) { return (a + b - 1) / b; }

void kernel(float dx, float* dev_out) {
    static const dim3 threads(8, 8);
    dim3 blocks(divup(XSIZE, threads.x), divup(YSIZE, threads.y));

    // clang-format off
    sincos_surf<<<blocks, threads>>>(dx, dev_out, XMIN, YMIN, XSIZE, YSIZE);
    // clang-format on
}

OpenCL¶

/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>

#include "cl_helpers.h"

#include <algorithm>
#include <cmath>
#include <complex>
#include <iostream>
#include <iterator>
#include <mutex>
#include <vector>

static const float XMIN = -8.0f;
static const float XMAX = 8.f;
static const float YMIN = -8.0f;
static const float YMAX = 8.f;

const float DX       = 0.5;
const unsigned XSIZE = (unsigned)((XMAX - XMIN) / DX);
const unsigned YSIZE = (unsigned)((YMAX - YMIN) / DX);

using namespace std;

#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>

// clang-format off
static const std::string sin_surf_kernel =
R"EOK(
kernel void
surf(global float* out, const float dx, const float xmin, const float ymin,
     const unsigned w, const unsigned h) {
    int i = get_global_id(0);
    int j = get_global_id(1);

    float x = xmin + i * dx;
    float y = ymin + j * dx;

    if (i < w && j < h) {
        int offset          = j + i * h;
        out[3 * offset]     = x;
        out[3 * offset + 1] = y;
        float z             = sqrt(x * x + y * y) + 2.2204e-16;
        out[3 * offset + 2] = sin(z) / z;
    }
}
)EOK";
// clang-format on

inline
int divup(int a, int b)
{
    return (a + b - 1) / b;
}

void kernel(cl::Buffer& devOut, cl::CommandQueue& queue, cl::Device& device) {
    static bool compileFlag = true;
    static cl::Program prog;
    static cl::Kernel kern;

    if (compileFlag) {
        try {
            prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(),
                               sin_surf_kernel, false);

            std::vector<cl::Device> devs;
            devs.push_back(device);
            prog.build(devs);

            kern = cl::Kernel(prog, "surf");
        } catch (cl::Error err) {
            std::cout << "Compile Errors: " << std::endl;
            std::cout << err.what() << err.err() << std::endl;
            std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
                      << std::endl;
            exit(255);
        }
        std::cout << "Kernels compiled successfully" << std::endl;
        compileFlag = false;
    }

    NDRange local(8, 8);
    NDRange global(local[0] * divup(XSIZE, (int)(local[0])),
                   local[1] * divup(YSIZE, (int)(local[1])));

    kern.setArg(0, devOut);
    kern.setArg(1, DX);
    kern.setArg(2, XMIN);
    kern.setArg(3, YMIN);
    kern.setArg(4, XSIZE);
    kern.setArg(5, YSIZE);
    queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local);
}

int main(void) {
    try {
        /*
         * First Forge call should be a window creation call
         * so that necessary OpenGL context is created for any
         * other forge::* object to be created successfully
         */
        forge::Window wnd(1024, 768, "3d Surface Demo");
        wnd.makeCurrent();

        forge::Chart chart(FG_CHART_3D);
        chart.setAxesLimits(-10.f, 10.f, -10.f, 10.f, -0.5f, 1.f);
        chart.setAxesTitles("x-axis", "y-axis", "z-axis");

        forge::Surface surf = chart.surface(XSIZE, YSIZE, forge::f32);
        surf.setColor(FG_YELLOW);

        /*
         * Helper function to create a CLGL interop context.
         * This function checks for if the extension is available
         * and creates the context on the appropriate device.
         * Note: context and queue are defined in cl_helpers.h
         */
        context       = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue         = CommandQueue(context, device);

        cl::Buffer devOut(context, CL_MEM_READ_WRITE,
                          sizeof(float) * XSIZE * YSIZE * 3);

        kernel(devOut, queue, device);

        GfxHandle* handle;
        createGLBuffer(&handle, surf.vertices(), FORGE_VERTEX_BUFFER);
        /* copy your data into the pixel buffer object exposed by
         * forge::Surface class and then proceed to rendering.
         * To help the users with copying the data from compute
         * memory to display memory, Forge provides copy headers
         * along with the library to help with this task
         */
        copyToGLBuffer(handle, (ComputeResourceHandle)devOut(),
                       surf.verticesSize());

        do { wnd.draw(chart); } while (!wnd.close());

        releaseGLBuffer(handle);
    } catch (forge::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }

    return 0;
}