Examples¶
Image Rendering¶
CPU¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
const unsigned DIMX = 512;
const unsigned DIMY = 512;
struct Bitmap {
unsigned char* ptr;
unsigned width;
unsigned height;
};
Bitmap createBitmap(unsigned w, unsigned h);
void destroyBitmap(Bitmap& bmp);
void kernel(Bitmap& bmp);
int julia(int x, int y, int width, int height);
int main(void) {
Bitmap bmp = createBitmap(DIMX, DIMY);
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Fractal Demo");
wnd.makeCurrent();
/* create an font object and load necessary font
* and later pass it on to window object so that
* it can be used for rendering text
*
* NOTE: THIS IS OPTIONAL STEP, BY DEFAULT WINDOW WILL
* HAVE FONT ALREADY SETUP*/
forge::Font fnt;
#if defined(OS_WIN)
fnt.loadSystemFont("Calibri");
#else
fnt.loadSystemFont("Vera");
#endif
wnd.setFont(&fnt);
/* Create an image object which creates the necessary
* textures and pixel buffer objects to hold the image
* */
forge::Image img(DIMX, DIMY, FG_RGBA, forge::u8);
/* copy your data into the pixel buffer object exposed by
* forge::Image class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
kernel(bmp);
GfxHandle* handle = 0;
// create GL-CPU interop buffer
createGLBuffer(&handle, img.pixels(), FORGE_IMAGE_BUFFER);
// copy the data from compute buffer to graphics buffer
copyToGLBuffer(handle, (ComputeResourceHandle)bmp.ptr, img.size());
do { wnd.draw(img); } while (!wnd.close());
// destroy GL-CPU Interop buffer
releaseGLBuffer(handle);
destroyBitmap(bmp);
return 0;
}
Bitmap createBitmap(unsigned w, unsigned h) {
Bitmap retVal;
retVal.width = w;
retVal.height = h;
retVal.ptr = new unsigned char[4 * w * h];
return retVal;
}
void destroyBitmap(Bitmap& bmp) { delete[] bmp.ptr; }
void kernel(Bitmap& bmp) {
for (unsigned y = 0; y < bmp.height; ++y) {
for (unsigned x = 0; x < bmp.width; ++x) {
int offset = x + y * bmp.width;
int juliaVal = julia(x, y, bmp.width, bmp.height);
bmp.ptr[offset * 4 + 0] = 255 * juliaVal;
bmp.ptr[offset * 4 + 1] = 0;
bmp.ptr[offset * 4 + 2] = 0;
bmp.ptr[offset * 4 + 3] = 255;
}
}
}
int julia(int x, int y, int width, int height) {
const float scale = 1.5;
float jx = scale * (float)(width / 2.0f - x) / (width / 2.0f);
float jy = scale * (float)(height / 2.0f - y) / (height / 2.0f);
std::complex<float> c(-0.8f, 0.156f);
std::complex<float> a(jx, jy);
for (int i = 0; i < 200; i++) {
a = a * a + c;
if (abs(a) > 1000) return 0;
}
return 1;
}
|
CUDA¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <cuComplex.h>
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
const unsigned DIMX = 512;
const unsigned DIMY = 512;
const size_t TOT_SIZE = DIMX * DIMY * 4;
void kernel(unsigned char* dev_out);
int main(void) {
unsigned char* dev_out;
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Fractal Demo");
wnd.makeCurrent();
/* Create an image object which creates the necessary
* textures and pixel buffer objects to hold the image
* */
forge::Image img(DIMX, DIMY, FG_RGBA, forge::u8);
GfxHandle* handle = 0;
// create GL-CPU interop buffer
createGLBuffer(&handle, img.pixels(), FORGE_IMAGE_BUFFER);
/* copy your data into the pixel buffer object exposed by
* forge::Image class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
FORGE_CUDA_CHECK(cudaMalloc((void**)&dev_out, TOT_SIZE));
kernel(dev_out);
// copy the data from compute buffer to graphics buffer
copyToGLBuffer(handle, (ComputeResourceHandle)dev_out, img.size());
do { wnd.draw(img); } while (!wnd.close());
// destroy GL-CPU Interop buffer
releaseGLBuffer(handle);
FORGE_CUDA_CHECK(cudaFree(dev_out));
return 0;
}
__device__ int julia(int x, int y) {
const float scale = 1.5;
float jx = scale * (float)(DIMX / 2.0f - x) / (DIMX / 2.0f);
float jy = scale * (float)(DIMY / 2.0f - y) / (DIMY / 2.0f);
cuFloatComplex c = make_cuFloatComplex(-0.8f, 0.156f);
cuFloatComplex a = make_cuFloatComplex(jx, jy);
for (int i = 0; i < 200; i++) {
a = cuCaddf(cuCmulf(a, a), c);
if (cuCabsf(a) > 1000.0f) return 0;
}
return 1;
}
__global__ void julia(unsigned char* out) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < DIMX && y < DIMY) {
int offset = x + y * DIMX;
// now calculate the value at that position
int juliaValue = julia(x, y);
out[offset * 4 + 2] = 255 * juliaValue;
out[offset * 4 + 0] = 0;
out[offset * 4 + 1] = 0;
out[offset * 4 + 3] = 255;
}
}
inline int divup(int a, int b) { return (a + b - 1) / b; }
void kernel(unsigned char* dev_out) {
static const dim3 threads(8, 8);
dim3 blocks(divup(DIMX, threads.x), divup(DIMY, threads.y));
// clang-format off
julia<<<blocks, threads>>>(dev_out);
// clang-format on
}
|
OpenCL¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#include "cl_helpers.h"
#include <algorithm>
#include <iostream>
#include <mutex>
#include <sstream>
using namespace cl;
using namespace std;
const unsigned DIMX = 512;
const unsigned DIMY = 512;
const unsigned IMG_SIZE = DIMX * DIMY * 4;
#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>
// clang-format off
static const std::string fractal_ocl_kernel =
R"EOK(
float magnitude(float2 a) {
return sqrt(a.s0*a.s0+a.s1*a.s1);
}
float2 mul(float2 a, float2 b) {
return (float2)(a.s0*b.s0-a.s1*b.s1, a.s1*b.s0+a.s0*b.s1);
}
float2 add(float2 a, float2 b) {
return (float2)(a.s0+b.s0, a.s1+b.s1);
}
int pixel(int x, int y, int width, int height) {
const float scale = 1.5;
float jx = scale * (float)(width/2.0f - x)/(width/2.0f);
float jy = scale * (float)(height/2.0f - y)/(height/2.0f);
float2 c = (float2)(-0.8f, 0.156f);
float2 a = (float2)(jx, jy);
for (int i=0; i<200; i++) {
a = add(mul(a, a), c);
if (magnitude(a) > 1000.0f)
return 0;
}
return 1;
}
kernel
void julia(global unsigned char* out, const unsigned w, const unsigned h) {
int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
int y = get_group_id(1) * get_local_size(1) + get_local_id(1);
if (x<w && y<h) {
int offset = x + y * w;
int juliaValue = pixel(x, y, w, h);
out[offset*4 + 1] = 255 * juliaValue;
out[offset*4 + 0] = 0;
out[offset*4 + 2] = 0;
out[offset*4 + 3] = 255;
}
}
)EOK";
// clang-format on
inline int divup(int a, int b) {
return (a + b - 1) / b;
}
void kernel(cl::Buffer& devOut, cl::CommandQueue& queue) {
static std::once_flag compileFlag;
static cl::Program prog;
static cl::Kernel kern;
std::call_once(compileFlag, [queue]() {
prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(),
fractal_ocl_kernel, true);
kern = cl::Kernel(prog, "julia");
});
auto juliaOp = cl::KernelFunctor<Buffer, unsigned, unsigned>(kern);
static const NDRange local(8, 8);
NDRange global(local[0] * divup(DIMX, (int)(local[0])),
local[1] * divup(DIMY, (int)(local[1])));
juliaOp(EnqueueArgs(queue, global, local), devOut, DIMX, DIMY);
}
int main(void) {
try {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Fractal Demo");
wnd.makeCurrent();
/* Create an image object which creates the necessary
* textures and pixel buffer objects to hold the image
* */
forge::Image img(DIMX, DIMY, FG_RGBA, forge::u8);
/*
* Helper function to create a CLGL interop context.
* This function checks for if the extension is available
* and creates the context on the appropriate device.
* Note: context and queue are defined in cl_helpers.h
*/
context = createCLGLContext(wnd);
Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
queue = CommandQueue(context, device);
/* copy your data into the pixel buffer object exposed by
* forge::Image class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
cl::Buffer devOut(context, CL_MEM_READ_WRITE, IMG_SIZE);
kernel(devOut, queue);
GfxHandle* handle = 0;
// create GL-CPU interop buffer
createGLBuffer(&handle, img.pixels(), FORGE_IMAGE_BUFFER);
// copy the data from compute buffer to graphics buffer
copyToGLBuffer(handle, (ComputeResourceHandle)devOut(), img.size());
do { wnd.draw(img); } while (!wnd.close());
// destroy GL-CPU Interop buffer
releaseGLBuffer(handle);
} catch (forge::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
} catch (cl::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
}
return 0;
}
|
Bubblechart with Transparency¶
CPU¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <algorithm>
#include <cmath>
#include <complex>
#include <functional>
#include <iostream>
#include <iterator>
#include <random>
#include <vector>
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
const float FRANGE_START = 0.f;
const float FRANGE_END = 2.f * 3.1415926f;
using namespace std;
void map_range_to_vec_vbo(float range_start, float range_end, float dx,
std::vector<float>& vec, float (*map)(float)) {
if (range_start > range_end && dx > 0) return;
for (float i = range_start; i < range_end; i += dx) {
vec.push_back(i);
vec.push_back((*map)(i));
}
}
int main(void) {
std::vector<float> cosData;
std::vector<float> tanData;
map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, cosData, &cosf);
map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, tanData, &tanf);
std::random_device r;
std::default_random_engine e1(r());
std::mt19937_64 gen(r());
std::uniform_real_distribution<float> nDist(0.0f, 1.0f);
std::uniform_real_distribution<float> cDist(0.2f, 0.6f);
std::uniform_real_distribution<float> fDist(0.4f, 0.6f);
auto clr = std::bind(cDist, gen);
auto rnd = std::bind(nDist, e1);
auto alp = std::bind(fDist, gen);
std::vector<float> colors(3 * tanData.size());
std::vector<float> alphas(tanData.size());
std::vector<float> radii(tanData.size());
std::generate(colors.begin(), colors.end(), clr);
std::generate(radii.begin(), radii.end(),
[&] { return 20.0f + 60.0f * rnd(); });
std::generate(alphas.begin(), alphas.end(), alp);
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Bubble chart with Transparency Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_2D);
chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);
/* Create several plot objects which creates the necessary
* vertex buffer objects to hold the different plot types
*/
forge::Plot plt1 =
chart.plot((unsigned)(cosData.size() / 2), forge::f32, FG_PLOT_LINE,
FG_MARKER_TRIANGLE); // or specify a specific plot type
forge::Plot plt2 =
chart.plot((unsigned)(tanData.size() / 2), forge::f32, FG_PLOT_LINE,
FG_MARKER_CIRCLE); // last parameter specifies marker shape
/* Set plot colors */
plt1.setColor(FG_RED);
plt2.setColor(FG_GREEN); // use a forge predefined color
/* Set plot legends */
plt1.setLegend("Cosine");
plt2.setLegend("Tangent");
/* set plot global marker size */
plt1.setMarkerSize(20);
/* copy your data into the opengl buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
GfxHandle* handles[5];
// create GL-CPU interop buffers
createGLBuffer(&handles[0], plt1.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], plt2.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], plt2.colors(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[3], plt2.alphas(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[4], plt2.radii(), FORGE_VERTEX_BUFFER);
// copy the data from compute buffer to graphics buffer
copyToGLBuffer(handles[0], (ComputeResourceHandle)cosData.data(),
plt1.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)tanData.data(),
plt2.verticesSize());
/* update color value for tan graph */
copyToGLBuffer(handles[2], (ComputeResourceHandle)colors.data(),
plt2.colorsSize());
/* update alpha values for tan graph */
copyToGLBuffer(handles[3], (ComputeResourceHandle)alphas.data(),
plt2.alphasSize());
/* update marker sizes for tan graph markers */
copyToGLBuffer(handles[4], (ComputeResourceHandle)radii.data(),
plt2.radiiSize());
do { wnd.draw(chart); } while (!wnd.close());
// destroy GL-CPU Interop buffer
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
releaseGLBuffer(handles[3]);
releaseGLBuffer(handles[4]);
return 0;
}
|
CUDA¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <cuda_runtime.h>
#include <curand.h>
#include <curand_kernel.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
#include <iostream>
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
static const float DX = 0.1f;
static const float FRANGE_START = 0.f;
static const float FRANGE_END = 2 * 3.141592f;
static const size_t DATA_SIZE = (size_t)((FRANGE_END - FRANGE_START) / DX);
curandState_t* state;
void kernel(float* dev_out, int functionCode, float* colors, float* alphas,
float* radii);
inline int divup(int a, int b) { return (a + b - 1) / b; }
__global__ void setupRandomKernel(curandState* states,
unsigned long long seed) {
unsigned tid = blockDim.x * blockIdx.x + threadIdx.x;
curand_init(seed, tid, 0, &states[tid]);
}
int main(void) {
FORGE_CUDA_CHECK(
cudaMalloc((void**)&state, DATA_SIZE * sizeof(curandState_t)));
// clang-format off
setupRandomKernel<<<divup(DATA_SIZE, 32), 32>>>(state, 314567);
// clang-format on
float* cos_out;
float* tan_out;
float* colors_out;
float* alphas_out;
float* radii_out;
FORGE_CUDA_CHECK(
cudaMalloc((void**)&cos_out, sizeof(float) * DATA_SIZE * 2));
FORGE_CUDA_CHECK(
cudaMalloc((void**)&tan_out, sizeof(float) * DATA_SIZE * 2));
FORGE_CUDA_CHECK(
cudaMalloc((void**)&colors_out, sizeof(float) * DATA_SIZE * 3));
FORGE_CUDA_CHECK(
cudaMalloc((void**)&alphas_out, sizeof(float) * DATA_SIZE));
FORGE_CUDA_CHECK(cudaMalloc((void**)&radii_out, sizeof(float) * DATA_SIZE));
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Bubble chart with Transparency Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_2D);
chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);
/* Create several plot objects which creates the necessary
* vertex buffer objects to hold the different plot types
*/
forge::Plot plt1 =
chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE, FG_MARKER_TRIANGLE);
forge::Plot plt2 =
chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE, FG_MARKER_CIRCLE);
/* Set plot colors */
plt1.setColor(FG_RED);
plt2.setColor(FG_GREEN); // use a forge predefined color
/* Set plot legends */
plt1.setLegend("Cosine");
plt2.setLegend("Tangent");
/* set plot global marker size */
plt1.setMarkerSize(20);
/* copy your data into the opengl buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
GfxHandle* handles[5];
// create GL-CUDA interop buffers
createGLBuffer(&handles[0], plt1.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], plt2.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], plt2.colors(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[3], plt2.alphas(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[4], plt2.radii(), FORGE_VERTEX_BUFFER);
kernel(cos_out, 0, NULL, NULL, NULL);
kernel(tan_out, 1, colors_out, alphas_out, radii_out);
// copy the data from compute buffer to graphics buffer
copyToGLBuffer(handles[0], (ComputeResourceHandle)cos_out,
plt1.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)tan_out,
plt2.verticesSize());
/* update color value for tan graph */
copyToGLBuffer(handles[2], (ComputeResourceHandle)colors_out,
plt2.colorsSize());
/* update alpha values for tan graph */
copyToGLBuffer(handles[3], (ComputeResourceHandle)alphas_out,
plt2.alphasSize());
/* update marker sizes for tan graph markers */
copyToGLBuffer(handles[4], (ComputeResourceHandle)radii_out,
plt2.radiiSize());
do { wnd.draw(chart); } while (!wnd.close());
// destroy GL-CUDA Interop buffer
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
releaseGLBuffer(handles[3]);
releaseGLBuffer(handles[4]);
// destroy CUDA handles
FORGE_CUDA_CHECK(cudaFree(cos_out));
FORGE_CUDA_CHECK(cudaFree(tan_out));
FORGE_CUDA_CHECK(cudaFree(colors_out));
FORGE_CUDA_CHECK(cudaFree(alphas_out));
FORGE_CUDA_CHECK(cudaFree(radii_out));
return 0;
}
__global__ void mapKernel(float* out, int functionCode, float frange_start,
float dx) {
int id = blockIdx.x * blockDim.x + threadIdx.x;
float x = frange_start + id * dx;
float y;
switch (functionCode) {
case 0: y = cos(x); break;
case 1: y = tan(x); break;
default: y = sin(x); break;
}
out[2 * id + 0] = x;
out[2 * id + 1] = y;
}
__global__ void colorsKernel(float* colors, curandState* states) {
int id = blockIdx.x * blockDim.x + threadIdx.x;
colors[3 * id + 0] = curand_uniform(&states[id]);
colors[3 * id + 1] = curand_uniform(&states[id]);
colors[3 * id + 2] = curand_uniform(&states[id]);
}
__global__ void randKernel(float* out, curandState* states, float min,
float scale) {
int id = blockIdx.x * blockDim.x + threadIdx.x;
out[id] = curand_uniform(&states[id]) * scale + min;
}
void kernel(float* dev_out, int functionCode, float* colors, float* alphas,
float* radii) {
static const dim3 threads(32);
dim3 blocks(divup(DATA_SIZE, 32));
// clang-format off
mapKernel<<<blocks, threads>>>(dev_out, functionCode, FRANGE_START, DX);
if (colors) colorsKernel<<<blocks, threads>>>(colors, state);
if (alphas) randKernel<<<blocks, threads>>>(alphas, state, 0, 1);
if (radii) randKernel<<<blocks, threads>>>(radii, state, 20, 60);
// clang-format on
}
|
OpenCL¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#include "cl_helpers.h"
#include <algorithm>
#include <cmath>
#include <ctime>
#include <iostream>
#include <sstream>
#include <vector>
using namespace cl;
using namespace std;
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
static const float DX = 0.1f;
static const float FRANGE_START = 0.f;
static const float FRANGE_END = 2 * 3.141592f;
static const int DATA_SIZE = (int)((FRANGE_END - FRANGE_START) / DX);
#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>
// clang-format off
static const std::string chartKernels =
R"EOK(
float rand(int x) {
x = (x << 13) ^ x;
return (1.0 - ((x * (x * x * 15731 + 789221) + 1376312589) & 0x7fffffff) /
1073741824.0);
}
kernel void randKernel(global float* out, unsigned seed, float min, float scale,
int DATA_SIZE) {
int id = get_global_id(0);
if (id < DATA_SIZE) out[id] = scale * (1 + rand(seed * id)) / 2.0f + min;
}
kernel void colorsKernel(global float* out, unsigned rseed, unsigned gseed,
unsigned bseed, int DATA_SIZE) {
int id = get_global_id(0);
if (id < DATA_SIZE) {
out[3 * id + 0] = (1 + rand(rseed * id)) / 2.0f;
out[3 * id + 1] = (1 + rand(gseed * id)) / 2.0f;
out[3 * id + 2] = (1 + rand(bseed * id)) / 2.0f;
}
}
kernel void mapKernel(global float* out, int functionCode, float FRANGE_START,
float DX, int DATA_SIZE) {
int id = get_global_id(0);
float x = FRANGE_START + id * DX;
float y;
switch (functionCode) {
case 0: y = cos(x); break;
case 1: y = tan(x); break;
default: y = sin(x); break;
}
if (id < DATA_SIZE) {
out[2 * id + 0] = x;
out[2 * id + 1] = y;
}
}
)EOK";
// clang-format on
inline int divup(int a, int b)
{
return (a + b - 1) / b;
}
void kernel(cl::Buffer& devOut, int fnCode, int outFlags, cl::Buffer& colorsOut,
cl::Buffer& alphasOut, cl::Buffer& radiiOut,
cl::CommandQueue& queue, cl::Device& device) {
static bool compileFlag = true;
static cl::Program prog;
static cl::Kernel randKernel, colorsKernel, mapKernel;
std::srand((unsigned)(std::time(0)));
if (compileFlag) {
try {
prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), chartKernels,
false);
std::vector<cl::Device> devs;
devs.push_back(device);
prog.build(devs);
randKernel = cl::Kernel(prog, "randKernel");
colorsKernel = cl::Kernel(prog, "colorsKernel");
mapKernel = cl::Kernel(prog, "mapKernel");
} catch (cl::Error err) {
std::cout << "Compile Errors: " << std::endl;
std::cout << err.what() << err.err() << std::endl;
std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
<< std::endl;
exit(255);
}
std::cout << "Kernels compiled successfully" << std::endl;
compileFlag = false;
}
static const NDRange local(32);
NDRange global(local[0] * divup(DATA_SIZE, (int)(local[0])));
mapKernel.setArg(0, devOut);
mapKernel.setArg(1, fnCode);
mapKernel.setArg(2, FRANGE_START);
mapKernel.setArg(3, DX);
mapKernel.setArg(4, DATA_SIZE);
queue.enqueueNDRangeKernel(mapKernel, cl::NullRange, global, local);
if (outFlags & 0x00000001) {
colorsKernel.setArg(0, colorsOut);
colorsKernel.setArg(1, std::rand());
colorsKernel.setArg(2, std::rand());
colorsKernel.setArg(3, std::rand());
colorsKernel.setArg(4, DATA_SIZE);
queue.enqueueNDRangeKernel(colorsKernel, cl::NullRange, global, local);
}
if (outFlags & 0x00000002) {
randKernel.setArg(0, alphasOut);
randKernel.setArg(1, std::rand());
randKernel.setArg(2, 0.0f);
randKernel.setArg(3, 1.0f);
randKernel.setArg(4, DATA_SIZE);
queue.enqueueNDRangeKernel(randKernel, cl::NullRange, global, local);
}
if (outFlags & 0x00000004) {
randKernel.setArg(0, radiiOut);
randKernel.setArg(1, std::rand());
randKernel.setArg(2, 20.0f);
randKernel.setArg(3, 60.0f);
randKernel.setArg(4, DATA_SIZE);
queue.enqueueNDRangeKernel(randKernel, cl::NullRange, global, local);
}
}
int main(void) {
try {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Bubble chart with Transparency Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_2D);
chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);
/* Create several plot objects which creates the necessary
* vertex buffer objects to hold the different plot types
*/
forge::Plot plt1 =
chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE, FG_MARKER_TRIANGLE);
forge::Plot plt2 =
chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE, FG_MARKER_CIRCLE);
/* Set plot colors */
plt1.setColor(FG_RED);
plt2.setColor(FG_GREEN); // use a forge predefined color
/* Set plot legends */
plt1.setLegend("Cosine");
plt2.setLegend("Tangent");
/* set plot global marker size */
plt1.setMarkerSize(20);
/*
* Helper function to create a CLGL interop context.
* This function checks for if the extension is available
* and creates the context on the appropriate device.
* Note: context and queue are defined in cl_helpers.h
*/
context = createCLGLContext(wnd);
Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
queue = CommandQueue(context, device);
GfxHandle* handles[5];
// create GL-OpenCL interop buffers
createGLBuffer(&handles[0], plt1.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], plt2.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], plt2.colors(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[3], plt2.alphas(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[4], plt2.radii(), FORGE_VERTEX_BUFFER);
cl::Buffer cosOut(context, CL_MEM_READ_WRITE,
sizeof(float) * DATA_SIZE * 2);
cl::Buffer tanOut(context, CL_MEM_READ_WRITE,
sizeof(float) * DATA_SIZE * 2);
cl::Buffer colorsOut(context, CL_MEM_READ_WRITE,
sizeof(float) * DATA_SIZE * 3);
cl::Buffer alphasOut(context, CL_MEM_READ_WRITE,
sizeof(float) * DATA_SIZE);
cl::Buffer radiiOut(context, CL_MEM_READ_WRITE,
sizeof(float) * DATA_SIZE);
cl::Buffer dummy;
kernel(cosOut, 0, 0, dummy, dummy, dummy, queue, device);
kernel(tanOut, 1, 0x00000007, colorsOut, alphasOut, radiiOut, queue,
device);
/* copy your data into the opengl buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handles[0], (ComputeResourceHandle)cosOut(),
plt1.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)tanOut(),
plt2.verticesSize());
/* update color value for tan graph */
copyToGLBuffer(handles[2], (ComputeResourceHandle)colorsOut(),
plt2.colorsSize());
/* update alpha values for tan graph */
copyToGLBuffer(handles[3], (ComputeResourceHandle)alphasOut(),
plt2.alphasSize());
/* update marker sizes for tan graph markers */
copyToGLBuffer(handles[4], (ComputeResourceHandle)radiiOut(),
plt2.radiiSize());
do { wnd.draw(chart); } while (!wnd.close());
// destroy GL-OpenCL Interop buffer
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
releaseGLBuffer(handles[3]);
releaseGLBuffer(handles[4]);
} catch (forge::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
} catch (cl::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
}
return 0;
}
|
2D Vector Field¶
CPU¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>
const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float PI = 3.14159265359f;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP = 2.0f;
const float NELEMS = (MAXIMUM - MINIMUM + 1) / STEP;
const unsigned DPOINTS[] = {5, 5, 5, 15, 15, 5, 15, 15};
using namespace std;
void generatePoints(std::vector<float> &points, std::vector<float> &dirs) {
points.clear();
for (int j = 0; j < NELEMS; ++j) {
float y = MINIMUM + j * STEP;
for (int i = 0; i < NELEMS; ++i) {
float x = MINIMUM + i * STEP;
points.push_back(x);
points.push_back(y);
dirs.push_back(sin(2 * PI * x / 10.f));
dirs.push_back(sin(2 * PI * y / 10.f));
}
}
}
int main(void) {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Vector Field Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_2D);
chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM);
chart.setAxesTitles("x-axis", "y-axis");
forge::Plot divPoints =
chart.plot(4, forge::u32, FG_PLOT_SCATTER, FG_MARKER_CIRCLE);
divPoints.setColor(0.9f, 0.9f, 0.0f, 1.f);
divPoints.setLegend("Convergence Points");
divPoints.setMarkerSize(24);
forge::VectorField field =
chart.vectorField((unsigned)(NELEMS * NELEMS), forge::f32);
field.setColor(0.f, 0.6f, 0.3f, 1.f);
std::vector<float> points;
std::vector<float> dirs;
generatePoints(points, dirs);
GfxHandle *handles[3];
createGLBuffer(&handles[0], divPoints.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], field.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);
copyToGLBuffer(handles[0], (ComputeResourceHandle)DPOINTS,
divPoints.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)points.data(),
field.verticesSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs.data(),
field.directionsSize());
do { wnd.draw(chart); } while (!wnd.close());
// destroy GL-cpu interop buffers
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
return 0;
}
|
CUDA¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#define PI 3.14159265359
const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP = 2.0f;
const float NELEMS = (MAXIMUM - MINIMUM + 1) / STEP;
const unsigned DPOINTS[] = {5, 5, 5, 15, 15, 5, 15, 15};
void generatePoints(float* points, float* dirs);
inline int divup(int a, int b) { return (a + b - 1) / b; }
int main(void) {
unsigned* dpoints;
float* points;
float* dirs;
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Vector Field Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_2D);
chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM);
chart.setAxesTitles("x-axis", "y-axis");
forge::Plot divPoints =
chart.plot(4, forge::u32, FG_PLOT_SCATTER, FG_MARKER_CIRCLE);
divPoints.setColor(0.9f, 0.9f, 0.0f, 1.f);
divPoints.setLegend("Convergence Points");
divPoints.setMarkerSize(24);
size_t npoints = (size_t)(NELEMS * NELEMS);
forge::VectorField field =
chart.vectorField((unsigned)(npoints), forge::f32);
field.setColor(0.f, 0.6f, 0.3f, 1.f);
FORGE_CUDA_CHECK(cudaMalloc((void**)&dpoints, 8 * sizeof(unsigned)));
FORGE_CUDA_CHECK(cudaMalloc((void**)&points, 2 * npoints * sizeof(float)));
FORGE_CUDA_CHECK(cudaMalloc((void**)&dirs, 2 * npoints * sizeof(float)));
GfxHandle* handles[3];
createGLBuffer(&handles[0], divPoints.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], field.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);
FORGE_CUDA_CHECK(cudaMemcpy(dpoints, DPOINTS, 8 * sizeof(unsigned),
cudaMemcpyHostToDevice));
generatePoints(points, dirs);
copyToGLBuffer(handles[0], (ComputeResourceHandle)dpoints,
divPoints.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)points,
field.verticesSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs,
field.directionsSize());
do { wnd.draw(chart); } while (!wnd.close());
// destroy GL-CUDA interop buffers
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
// destroy CUDA handles
FORGE_CUDA_CHECK(cudaFree(dpoints));
FORGE_CUDA_CHECK(cudaFree(points));
FORGE_CUDA_CHECK(cudaFree(dirs));
return 0;
}
__global__ void pointGenKernel(float* points, float* dirs, int nelems,
float minimum, float step) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
if (i < nelems && j < nelems) {
int id = i + j * nelems;
float x = minimum + i * step;
float y = minimum + j * step;
points[2 * id + 0] = x;
points[2 * id + 1] = y;
dirs[2 * id + 0] = sinf(2.0f * PI * x / 10.f);
dirs[2 * id + 1] = sinf(2.0f * PI * y / 10.f);
}
}
void generatePoints(float* points, float* dirs) {
static dim3 threads(8, 8);
dim3 blocks(divup((int)(NELEMS), threads.x),
divup((int)(NELEMS), threads.y));
// clang-format off
pointGenKernel<<<blocks, threads>>>(points, dirs, (int)(NELEMS), MINIMUM,
STEP);
// clang-format on
}
|
OpenCL¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#include "cl_helpers.h"
#include <algorithm>
#include <cmath>
#include <ctime>
#include <iostream>
#include <iterator>
#include <sstream>
#include <vector>
using namespace cl;
using namespace std;
const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP = 2.0f;
const float NELEMS = (MAXIMUM - MINIMUM + 1) / STEP;
const unsigned DPOINTS[] = {5, 5, 5, 15, 15, 5, 15, 15};
#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>
// clang-format off
static const std::string fieldKernel =
R"EOK(
constant float PI = 3.14159265359;
kernel void pointGenKernel(global float* points, global float* dirs, int NELEMS,
float MINIMUM, float STEP) {
int i = get_global_id(0);
int j = get_global_id(1);
if (i < NELEMS && j < NELEMS) {
int id = i + j * NELEMS;
float x = MINIMUM + i * STEP;
float y = MINIMUM + j * STEP;
points[2 * id + 0] = x;
points[2 * id + 1] = y;
dirs[2 * id + 0] = sin(2.0 * PI * x / 10.0);
dirs[2 * id + 1] = sin(2.0 * PI * y / 10.0);
}
}
)EOK";
// clang-format on
inline int divup(int a, int b)
{
return (a + b - 1) / b;
}
void generatePoints(cl::Buffer& points, cl::Buffer& dirs,
cl::CommandQueue& queue, cl::Device& device) {
static bool compileFlag = true;
static cl::Program prog;
static cl::Kernel pointGenKernel;
if (compileFlag) {
try {
prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), fieldKernel,
false);
std::vector<cl::Device> devs;
devs.push_back(device);
prog.build(devs);
pointGenKernel = cl::Kernel(prog, "pointGenKernel");
} catch (cl::Error err) {
std::cout << "Compile Errors: " << std::endl;
std::cout << err.what() << err.err() << std::endl;
std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
<< std::endl;
exit(255);
}
std::cout << "Kernels compiled successfully" << std::endl;
compileFlag = false;
}
static const NDRange local(8, 8);
NDRange global(local[0] * divup((int)(NELEMS), (int)(local[0])),
local[1] * divup((int)(NELEMS), (int)(local[1])));
pointGenKernel.setArg(0, points);
pointGenKernel.setArg(1, dirs);
pointGenKernel.setArg(2, (int)NELEMS);
pointGenKernel.setArg(3, MINIMUM);
pointGenKernel.setArg(4, STEP);
queue.enqueueNDRangeKernel(pointGenKernel, cl::NullRange, global, local);
}
int main(void) {
try {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Vector Field Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_2D);
chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM);
chart.setAxesTitles("x-axis", "y-axis");
forge::Plot divPoints =
chart.plot(4, forge::u32, FG_PLOT_SCATTER, FG_MARKER_CIRCLE);
divPoints.setColor(0.9f, 0.9f, 0.0f, 1.f);
divPoints.setLegend("Convergence Points");
divPoints.setMarkerSize(24);
size_t npoints = (size_t)(NELEMS * NELEMS);
forge::VectorField field =
chart.vectorField((unsigned)(npoints), forge::f32);
field.setColor(0.f, 0.6f, 0.3f, 1.f);
/*
* Helper function to create a CLGL interop context.
* This function checks for if the extension is available
* and creates the context on the appropriate device.
* Note: context and queue are defined in cl_helpers.h
*/
context = createCLGLContext(wnd);
Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
queue = CommandQueue(context, device);
GfxHandle* handles[3];
createGLBuffer(&handles[0], divPoints.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], field.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);
cl::Buffer dpoints(context, CL_MEM_READ_WRITE, sizeof(unsigned) * 8);
cl::Buffer points(context, CL_MEM_READ_WRITE,
sizeof(float) * 2 * npoints);
cl::Buffer dirs(context, CL_MEM_READ_WRITE,
sizeof(float) * 2 * npoints);
queue.enqueueWriteBuffer(dpoints, CL_TRUE, 0, sizeof(unsigned) * 8,
DPOINTS);
generatePoints(points, dirs, queue, device);
copyToGLBuffer(handles[0], (ComputeResourceHandle)dpoints(),
divPoints.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)points(),
field.verticesSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs(),
field.directionsSize());
do { wnd.draw(chart); } while (!wnd.close());
// destroy GL-CUDA interop buffers
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
} catch (forge::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
} catch (cl::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
}
return 0;
}
|
Histogram¶
CPU¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <vector>
const unsigned IMGW = 256;
const unsigned IMGH = 256;
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
const unsigned NBINS = 256;
using namespace std;
struct Bitmap {
unsigned char* ptr;
unsigned width;
unsigned height;
};
class PerlinNoise {
private:
float base[IMGW][IMGH];
float perlin[IMGW][IMGH];
public:
PerlinNoise();
float noise(float u, float v);
};
Bitmap createBitmap(unsigned w, unsigned h);
void destroyBitmap(Bitmap& bmp);
void kernel(Bitmap& bmp);
void populateBins(Bitmap& bmp, int* hist_array, const unsigned nbins,
float* hist_cols);
int main(int argc, char* argv[]) {
Bitmap bmp = createBitmap(IMGW, IMGH);
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Histogram Demo");
wnd.makeCurrent();
forge::Image img(IMGW, IMGH, FG_RGBA, forge::u8);
forge::Chart chart(FG_CHART_2D);
/* set x axis limits to maximum and minimum values of data
* and y axis limits to range [0, number of pixels ideally]
* but practically total number of pixels as y range will skew
* the histogram graph vertically. Therefore setting it to
* 25% of total number of pixels */
chart.setAxesLimits(0, 1, 0, IMGW * IMGH / (float)(NBINS / 4.0));
/*
* Create histogram object specifying number of bins
*/
forge::Histogram hist = chart.histogram(NBINS, forge::s32);
/*
* Set histogram colors
*/
hist.setColor(FG_YELLOW);
GfxHandle* handles[3];
createGLBuffer(&handles[0], img.pixels(), FORGE_IMAGE_BUFFER);
createGLBuffer(&handles[1], hist.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], hist.colors(), FORGE_VERTEX_BUFFER);
wnd.setColorMap((fg_color_map)(argc == 2 ? atoi(argv[1]) : 1));
do {
/*
* generate image, and prepare data to pass into
* Histogram's underlying vertex buffer object
*/
kernel(bmp);
copyToGLBuffer(handles[0], (ComputeResourceHandle)bmp.ptr, img.size());
// forge::copy(img, (const void*)bmp.ptr);
/* copy your data into the vertex buffer object exposed by
* forge::Histogram class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
std::vector<int> histArray(NBINS, 0);
std::vector<float> colArray(3 * NBINS, 0.0f);
populateBins(bmp, histArray.data(), NBINS, colArray.data());
copyToGLBuffer(handles[1], (ComputeResourceHandle)histArray.data(),
hist.verticesSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)colArray.data(),
hist.colorsSize());
/*
* Split the window into grid regions
*/
// wnd.draw(2, 2, 0, img, "Dynamic Perlin Noise" );
// wnd.draw(2, 2, 1, img, "Dynamic Perlin Noise" );
// wnd.draw(2, 1, 1, chart, "Histogram of Noisy Image");
wnd.draw(2, 3, 0, img, "Dynamic Perlin Noise");
wnd.draw(2, 3, 1, img, "Dynamic Perlin Noise");
wnd.draw(2, 3, 2, img, "Dynamic Perlin Noise");
wnd.draw(2, 2, 2, chart, "Histogram of Noisy Image");
wnd.draw(2, 2, 3, chart, "Histogram of Noisy Image");
wnd.swapBuffers();
} while (!wnd.close());
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
return 0;
}
float interp(float x0, float x1, float alpha) {
return x0 * (1 - alpha) + alpha * x1;
}
PerlinNoise::PerlinNoise() {
std::srand((unsigned)(std::time(0)));
for (unsigned i = 0; i < IMGW; i++) {
for (unsigned j = 0; j < IMGH; j++) {
base[i][j] = std::rand() / (float)(RAND_MAX);
perlin[i][j] = 0;
}
}
float persistence = 0.5f;
float amp = 1.0f;
float tamp = 0.0f;
for (int octave = 6; octave >= 0; --octave) {
int period = 1 << octave;
float freq = 1.0f / period;
for (unsigned i = 0; i < IMGW; i++) {
int si0 = (i / period) * period;
int si1 = (si0 + period) % IMGW;
float hblend = (i - si0) * freq;
for (unsigned j = 0; j < IMGH; j++) {
int sj0 = (j / period) * period;
int sj1 = (sj0 + period) % IMGH;
float vblend = (j - sj0) * freq;
float top = interp(base[si0][sj0], base[si1][sj0], hblend);
float bot = interp(base[si0][sj1], base[si1][sj1], hblend);
perlin[i][j] += (amp * interp(top, bot, vblend));
}
}
tamp += amp;
amp *= persistence;
}
for (unsigned i = 0; i < IMGW; i++)
for (unsigned j = 0; j < IMGH; j++) perlin[i][j] /= tamp;
}
float PerlinNoise::noise(float u, float v) {
return perlin[(unsigned)(IMGW * u)][(unsigned)(IMGH * v)];
}
Bitmap createBitmap(unsigned w, unsigned h) {
Bitmap retVal;
retVal.width = w;
retVal.height = h;
retVal.ptr = new unsigned char[4 * w * h];
return retVal;
}
void destroyBitmap(Bitmap& bmp) { delete[] bmp.ptr; }
void kernel(Bitmap& bmp) {
PerlinNoise perlin;
for (unsigned y = 0; y < bmp.height; ++y) {
for (unsigned x = 0; x < bmp.width; ++x) {
int offset = x + y * bmp.width;
float u = x / (float)(bmp.width);
float v = y / (float)(bmp.height);
unsigned char noiseVal = (unsigned char)(255 * perlin.noise(u, v));
bmp.ptr[offset * 4 + 0] = noiseVal;
bmp.ptr[offset * 4 + 1] = noiseVal;
bmp.ptr[offset * 4 + 2] = noiseVal;
bmp.ptr[offset * 4 + 3] = 255;
}
}
}
void populateBins(Bitmap& bmp, int* hist_array, const unsigned nbins,
float* hist_cols) {
for (unsigned y = 0; y < bmp.height; ++y) {
for (unsigned x = 0; x < bmp.width; ++x) {
int offset = x + y * bmp.width;
unsigned char noiseVal = bmp.ptr[offset * 4];
unsigned idx = (int)((float)noiseVal / 255.f * nbins);
hist_array[idx]++;
}
}
for (unsigned b = 0; b < nbins; ++b) {
hist_cols[3 * b + 0] = std::rand() / (float)RAND_MAX;
hist_cols[3 * b + 1] = std::rand() / (float)RAND_MAX;
hist_cols[3 * b + 2] = std::rand() / (float)RAND_MAX;
}
}
|
CUDA¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <cuComplex.h>
#include <cuda_runtime.h>
#include <curand.h>
#include <curand_kernel.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
const unsigned IMGW = 256;
const unsigned IMGH = 256;
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
const unsigned NBINS = 256;
curandState_t* state;
struct Bitmap {
unsigned char* ptr;
unsigned width;
unsigned height;
};
class PerlinNoise {
public:
float* base;
float* perlin;
PerlinNoise();
~PerlinNoise();
void generateNoise();
};
Bitmap createBitmap(unsigned w, unsigned h);
void destroyBitmap(Bitmap& bmp);
void kernel(Bitmap& bmp, PerlinNoise& pn);
void populateBins(Bitmap& bmp, int* hist_array, const unsigned nbins,
float* hist_cols);
__global__ void setupRandomKernel(curandState* states,
unsigned long long seed) {
unsigned tid = blockDim.x * blockIdx.x + threadIdx.x;
curand_init(seed, tid, 0, &states[tid]);
}
int main(void) {
Bitmap bmp = createBitmap(IMGW, IMGH);
FORGE_CUDA_CHECK(cudaMalloc((void**)&state, NBINS * sizeof(curandState_t)));
// clang-format off
setupRandomKernel<<<1, NBINS>>>(state, 314567);
// clang-format on
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Histogram Demo");
wnd.makeCurrent();
forge::Image img(IMGW, IMGH, FG_RGBA, forge::u8);
forge::Chart chart(FG_CHART_2D);
chart.setAxesLabelFormat("%3.1f", "%.2e");
/* set x axis limits to maximum and minimum values of data
* and y axis limits to range [0, number of pixels ideally]
* but practically total number of pixels as y range will skew
* the histogram graph vertically. Therefore setting it to
* 25% of total number of pixels */
chart.setAxesLimits(0, 1, 0, IMGW * IMGH / (float)(NBINS / 4.0));
/*
* Create histogram object specifying number of bins
*/
forge::Histogram hist = chart.histogram(NBINS, forge::s32);
/*
* Set histogram colors
*/
hist.setColor(FG_YELLOW);
PerlinNoise noiseGenerator;
int* histOut;
float* histColors;
FORGE_CUDA_CHECK(cudaMalloc((void**)&histOut, NBINS * sizeof(int)));
FORGE_CUDA_CHECK(
cudaMalloc((void**)&histColors, 3 * NBINS * sizeof(float)));
GfxHandle* handles[3];
createGLBuffer(&handles[0], img.pixels(), FORGE_IMAGE_BUFFER);
createGLBuffer(&handles[1], hist.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], hist.colors(), FORGE_VERTEX_BUFFER);
unsigned frame = 0;
do {
if (frame % 8 == 0) {
kernel(bmp, noiseGenerator);
copyToGLBuffer(handles[0], (ComputeResourceHandle)bmp.ptr,
img.size());
populateBins(bmp, histOut, NBINS, histColors);
copyToGLBuffer(handles[1], (ComputeResourceHandle)histOut,
hist.verticesSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)histColors,
hist.colorsSize());
frame = 0;
}
/*
* Split the window into grid regions
*/
wnd.draw(1, 2, 0, img, "Dynamic Perlin Noise");
wnd.draw(1, 2, 1, chart, "Histogram of Noisy Image");
wnd.swapBuffers();
frame++;
} while (!wnd.close());
FORGE_CUDA_CHECK(cudaFree(histOut));
FORGE_CUDA_CHECK(cudaFree(histColors));
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
return 0;
}
Bitmap createBitmap(unsigned w, unsigned h) {
Bitmap retVal;
retVal.width = w;
retVal.height = h;
FORGE_CUDA_CHECK(
cudaMalloc((void**)&retVal.ptr, sizeof(unsigned char) * 4 * w * h));
return retVal;
}
void destroyBitmap(Bitmap& bmp) { FORGE_CUDA_CHECK(cudaFree(bmp.ptr)); }
PerlinNoise::PerlinNoise() {
const size_t IMG_SIZE = IMGW * IMGH * sizeof(float);
FORGE_CUDA_CHECK(cudaMalloc((void**)&base, IMG_SIZE));
FORGE_CUDA_CHECK(cudaMalloc((void**)&perlin, IMG_SIZE));
}
PerlinNoise::~PerlinNoise() {
FORGE_CUDA_CHECK(cudaFree(base));
FORGE_CUDA_CHECK(cudaFree(perlin));
}
inline int divup(int a, int b) { return (a + b - 1) / b; }
__device__ float interp(float x0, float x1, float alpha) {
return x0 * (1 - alpha) + alpha * x1;
}
__global__ void perlinInitKernel(float* base, float* perlin,
curandState* state) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < IMGW && y < IMGH) {
int index = y * IMGW + x;
base[index] = curand_uniform(&state[index % NBINS]);
perlin[index] = 0.0f;
}
}
__global__ void perlinComputeKernel(float* perlin, float* base, float amp,
int period) {
unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < IMGW && y < IMGH) {
int index = y * IMGW + x;
float freq = 1.0f / period;
int si0 = (x / period) * period;
int si1 = (si0 + period) % IMGW;
float hblend = (x - si0) * freq;
int sj0 = (y / period) * period;
int sj1 = (sj0 + period) % IMGH;
float vblend = (y - sj0) * freq;
float top =
interp(base[si0 + IMGW * sj0], base[si1 + IMGW * sj0], hblend);
float bot =
interp(base[si0 + IMGW * sj1], base[si1 + IMGW * sj1], hblend);
perlin[index] += (amp * interp(top, bot, vblend));
}
}
__global__ void perlinNormalize(float* perlin, float tamp) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < IMGW && y < IMGH) {
int index = y * IMGW + x;
perlin[index] = perlin[index] / tamp;
}
}
void PerlinNoise::generateNoise() {
static dim3 threads(32, 8);
dim3 blocks(divup(IMGW, threads.x), divup(IMGH, threads.y));
float persistence = 0.5f;
float amp = 1.0f;
float tamp = 0.0f;
// clang-format off
perlinInitKernel<<<blocks, threads>>>(base, perlin, state);
// clang-format on
for (int octave = 6; octave >= 0; --octave) {
int period = 1 << octave;
// clang-format off
perlinComputeKernel<<<blocks, threads>>>(perlin, base, amp, period);
// clang-format on
tamp += amp;
amp *= persistence;
}
// clang-format off
perlinNormalize<<<blocks, threads>>>(perlin, tamp);
// clang-format on
}
__global__ void fillImageKernel(unsigned char* ptr, unsigned width,
unsigned height, float* perlin) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < width && y < height) {
int offset = x + y * width;
unsigned u = (unsigned)(IMGW * x / (float)(width));
unsigned v = (unsigned)(IMGH * y / (float)(height));
int idx = u + v * IMGW;
unsigned char val = 255 * perlin[idx];
ptr[offset * 4 + 0] = val;
ptr[offset * 4 + 1] = val;
ptr[offset * 4 + 2] = val;
ptr[offset * 4 + 3] = 255;
}
}
void kernel(Bitmap& bmp, PerlinNoise& pn) {
static dim3 threads(32, 8);
pn.generateNoise();
dim3 blocks(divup(bmp.width, threads.x), divup(bmp.height, threads.y));
// clang-format off
fillImageKernel<<<blocks, threads>>>(bmp.ptr, bmp.width, bmp.height,
pn.perlin);
// clang-format on
}
__global__ void histogramKernel(const unsigned char* perlinNoise, int* histOut,
const unsigned nbins) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < IMGW && y < IMGH) {
int offset = y * IMGW + x;
unsigned char noiseVal = perlinNoise[offset * 4 + 0];
offset = __float2int_rd(nbins * (__int2float_rd(noiseVal) / 255.f));
atomicAdd(histOut + offset, 1);
}
}
__global__ void histColorsKernel(float* histColors, curandState* states) {
int bin = blockIdx.x * blockDim.x + threadIdx.x;
histColors[3 * bin + 0] = curand_uniform(&states[bin]);
histColors[3 * bin + 1] = curand_uniform(&states[bin]);
histColors[3 * bin + 2] = curand_uniform(&states[bin]);
}
void populateBins(Bitmap& bmp, int* histOut, const unsigned nbins,
float* histColors) {
static const dim3 threads(8, 8);
dim3 blocks(divup(bmp.width, threads.x), divup(bmp.height, threads.y));
cudaMemset(histOut, 0, nbins * sizeof(int));
// clang-format off
histogramKernel<<<blocks, threads>>>(bmp.ptr, histOut, nbins);
histColorsKernel<<<1, nbins>>>(histColors, state);
// clang-format on
}
|
OpenCL¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#include <algorithm>
#include <cmath>
#include <ctime>
#include <iostream>
#include <iterator>
#include <sstream>
#include <vector>
#include "cl_helpers.h"
using namespace cl;
using namespace std;
const unsigned IMGW = 256;
const unsigned IMGH = 256;
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
const unsigned IMG_SIZE = IMGW * IMGH * 4;
const unsigned NBINS = 256;
const float PERSISTENCE = 0.5f;
#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>
// clang-format off
static const std::string perlinKernels =
R"EOK(
float rand(int x) {
x = (x << 13) ^ x;
return (1.0 - ((x * (x * x * 15731 + 789221) + 1376312589) & 0x7fffffff) /
1073741824.0);
}
float interp(float x0, float x1, float t) { return x0 + (x1 - x0) * t; }
kernel void init(global float* base, global float* perlin, int IMGW, int IMGH,
int randSeed) {
int x = get_global_id(0);
int y = get_global_id(1);
if (x < IMGW && y < IMGH) {
int i = x + y * IMGW;
base[i] = (1 + rand(randSeed * i)) / 2.0f;
perlin[i] = 0.0f;
}
}
kernel void compute(global float* perlin, global float* base, unsigned IMGW,
unsigned IMGH, float amp, int period) {
int x = get_global_id(0);
int y = get_global_id(1);
if (x < IMGW && y < IMGH) {
int index = y * IMGW + x;
float freq = 1.0f / period;
int si0 = (x / period) * period;
int si1 = (si0 + period) % IMGW;
float hblend = (x - si0) * freq;
int sj0 = (y / period) * period;
int sj1 = (sj0 + period) % IMGH;
float vblend = (y - sj0) * freq;
float top =
interp(base[si0 + IMGW * sj0], base[si1 + IMGW * sj0], hblend);
float bot =
interp(base[si0 + IMGW * sj1], base[si1 + IMGW * sj1], hblend);
perlin[index] += (amp * interp(top, bot, vblend));
}
}
kernel void normalizeNoise(global float* perlin, unsigned IMGW, unsigned IMGH,
float tamp) {
int x = get_global_id(0);
int y = get_global_id(1);
if (x < IMGW && y < IMGH) {
int index = y * IMGW + x;
perlin[index] = perlin[index] / tamp;
}
}
kernel void fillImage(global unsigned char* ptr, unsigned width,
unsigned height, global float* perlin, unsigned IMGW,
unsigned IMGH) {
int x = get_global_id(0);
int y = get_global_id(1);
if (x < width && y < height) {
int offset = x + y * width;
unsigned u = (unsigned)(IMGW * x / (float)(width));
unsigned v = (unsigned)(IMGH * y / (float)(height));
int idx = u + v * IMGW;
unsigned char val = 255 * perlin[idx];
ptr[offset * 4 + 0] = val;
ptr[offset * 4 + 1] = val;
ptr[offset * 4 + 2] = val;
ptr[offset * 4 + 3] = 255;
}
}
kernel void memSet(global int* out, unsigned len) {
if (get_global_id(0) < len) out[get_global_id(0)] = 0;
}
kernel void histogram(const global unsigned char* perlinNoise,
global int* histOut, const unsigned w, const unsigned h,
const unsigned nbins) {
int x = get_global_id(0);
int y = get_global_id(1);
if (x < w && y < h) {
int offset = y * w + x;
unsigned char noiseVal = perlinNoise[offset * 4 + 0];
offset = (int)(nbins * (noiseVal / 255.f));
atomic_add(histOut + offset, 1);
}
}
kernel void setColors(global float* out, unsigned rseed, unsigned gseed,
unsigned bseed) {
int i = get_global_id(0);
out[3 * i + 0] = (1 + rand(rseed * i)) / 2.0f;
out[3 * i + 1] = (1 + rand(gseed * i)) / 2.0f;
out[3 * i + 2] = (1 + rand(bseed * i)) / 2.0f;
};
)EOK";
// clang-format on
inline
int divup(int a, int b)
{
return (a + b - 1) / b;
}
void kernel(cl::Buffer& image, cl::Buffer& base, cl::Buffer& perlin,
cl::Buffer& histOut, cl::Buffer& colors, cl::CommandQueue& queue,
cl::Device& device) {
static bool compileFlag = true;
static cl::Program prog;
static cl::Kernel initKernel, computeKernel, normKernel, fillKernel;
static cl::Kernel memSetKernel, genHistogram, genHistColors;
std::srand((unsigned)(std::time(0)));
if (compileFlag) {
try {
prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), perlinKernels,
false);
std::vector<cl::Device> devs;
devs.push_back(device);
prog.build(devs);
initKernel = cl::Kernel(prog, "init");
computeKernel = cl::Kernel(prog, "compute");
normKernel = cl::Kernel(prog, "normalizeNoise");
fillKernel = cl::Kernel(prog, "fillImage");
memSetKernel = cl::Kernel(prog, "memSet");
genHistogram = cl::Kernel(prog, "histogram");
genHistColors = cl::Kernel(prog, "setColors");
} catch (cl::Error err) {
std::cout << "Compile Errors: " << std::endl;
std::cout << err.what() << err.err() << std::endl;
std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
<< std::endl;
exit(255);
}
std::cout << "Kernels compiled successfully" << std::endl;
compileFlag = false;
}
static const NDRange local(16, 16);
NDRange global(local[0] * divup(IMGW, (int)(local[0])),
local[1] * divup(IMGH, (int)(local[1])));
float persistence = 0.5f;
float amp = 1.0f;
float tamp = 0.0f;
initKernel.setArg(0, base);
initKernel.setArg(1, perlin);
initKernel.setArg(2, IMGW);
initKernel.setArg(3, IMGH);
initKernel.setArg(4, std::rand());
queue.enqueueNDRangeKernel(initKernel, cl::NullRange, global, local);
for (int octave = 6; octave >= 0; --octave) {
int period = 1 << octave;
computeKernel.setArg(0, perlin);
computeKernel.setArg(1, base);
computeKernel.setArg(2, IMGW);
computeKernel.setArg(3, IMGH);
computeKernel.setArg(4, amp);
computeKernel.setArg(5, period);
queue.enqueueNDRangeKernel(computeKernel, cl::NullRange, global, local);
tamp += amp;
amp *= persistence;
}
normKernel.setArg(0, perlin);
normKernel.setArg(1, IMGW);
normKernel.setArg(2, IMGH);
normKernel.setArg(3, tamp);
queue.enqueueNDRangeKernel(normKernel, cl::NullRange, global, local);
fillKernel.setArg(0, image);
fillKernel.setArg(1, IMGW);
fillKernel.setArg(2, IMGH);
fillKernel.setArg(3, perlin);
fillKernel.setArg(4, IMGW);
fillKernel.setArg(5, IMGH);
queue.enqueueNDRangeKernel(fillKernel, cl::NullRange, global, local);
static const NDRange global_hist(NBINS);
memSetKernel.setArg(0, histOut);
memSetKernel.setArg(1, NBINS);
queue.enqueueNDRangeKernel(memSetKernel, cl::NullRange, global_hist);
genHistogram.setArg(0, image);
genHistogram.setArg(1, histOut);
genHistogram.setArg(2, IMGW);
genHistogram.setArg(3, IMGH);
genHistogram.setArg(4, NBINS);
queue.enqueueNDRangeKernel(genHistogram, cl::NullRange, global, local);
genHistColors.setArg(0, colors);
genHistColors.setArg(1, std::rand());
genHistColors.setArg(2, std::rand());
genHistColors.setArg(3, std::rand());
queue.enqueueNDRangeKernel(genHistColors, cl::NullRange, global_hist);
}
int main(void) {
try {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Histogram Demo");
wnd.makeCurrent();
forge::Image img(IMGW, IMGH, FG_RGBA, forge::u8);
forge::Chart chart(FG_CHART_2D);
chart.setAxesLabelFormat("%3.1f", "%.2e");
/* set x axis limits to maximum and minimum values of data
* and y axis limits to range [0, number of pixels ideally]
* but practically total number of pixels as y range will skew
* the histogram graph vertically. Therefore setting it to
* 25% of total number of pixels */
chart.setAxesLimits(0, 1, 0, IMGW * IMGH / (float)(NBINS / 4.0));
/*
* Create histogram object specifying number of bins
*/
forge::Histogram hist = chart.histogram(NBINS, forge::s32);
/*
* Set histogram colors
*/
hist.setColor(FG_YELLOW);
/*
* Helper function to create a CLGL interop context.
* This function checks for if the extension is available
* and creates the context on the appropriate device.
* Note: context and queue are defined in cl_helpers.h
*/
context = createCLGLContext(wnd);
Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
queue = CommandQueue(context, device);
cl::Buffer image(context, CL_MEM_READ_WRITE, IMG_SIZE);
cl::Buffer baseNoise(context, CL_MEM_READ_WRITE, IMG_SIZE);
cl::Buffer perlinNoise(context, CL_MEM_READ_WRITE, IMG_SIZE);
cl::Buffer histOut(context, CL_MEM_READ_WRITE, NBINS * sizeof(int));
cl::Buffer colors(context, CL_MEM_READ_WRITE,
3 * NBINS * sizeof(float));
GfxHandle* handles[3];
createGLBuffer(&handles[0], img.pixels(), FORGE_IMAGE_BUFFER);
createGLBuffer(&handles[1], hist.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], hist.colors(), FORGE_VERTEX_BUFFER);
unsigned frame = 0;
do {
if (frame % 8 == 0) {
kernel(image, baseNoise, perlinNoise, histOut, colors, queue,
device);
copyToGLBuffer(handles[0], (ComputeResourceHandle)image(),
img.size());
copyToGLBuffer(handles[1], (ComputeResourceHandle)histOut(),
hist.verticesSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)colors(),
hist.colorsSize());
frame = 0;
}
/*
* Split the window into grid regions
*/
wnd.draw(1, 2, 0, img, "Dynamic Perlin Noise");
wnd.draw(1, 2, 1, chart, "Histogram of Noisy Image");
wnd.swapBuffers();
frame++;
} while (!wnd.close());
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
} catch (forge::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
} catch (cl::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
}
return 0;
}
|
3D Line Plot¶
CPU¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
static const float ZMIN = 0.1f;
static const float ZMAX = 10.f;
const float DX = 0.005f;
const size_t ZSIZE = (size_t)((ZMAX - ZMIN) / DX + 1);
using namespace std;
void generateCurve(float t, float dx, std::vector<float>& vec) {
vec.clear();
for (int i = 0; i < (int)ZSIZE; ++i) {
float z = ZMIN + i * dx;
vec.push_back((float)(cos(z * t + t) / z));
vec.push_back((float)(sin(z * t + t) / z));
vec.push_back((float)(z + 0.1 * sin(t)));
}
}
int main(void) {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Three dimensional line plot demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_3D);
chart.setAxesLabelFormat("%3.1f", "%3.1f", "%.2e");
chart.setAxesLimits(-1.1f, 1.1f, -1.1f, 1.1f, 0.f, 10.f);
chart.setAxesTitles("x-axis", "y-axis", "z-axis");
forge::Plot plot3 = chart.plot(ZSIZE, forge::f32);
// generate a surface
std::vector<float> function;
static float t = 0;
generateCurve(t, DX, function);
GfxHandle* handle;
createGLBuffer(&handle, plot3.vertices(), FORGE_VERTEX_BUFFER);
/* copy your data into the pixel buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handle, (ComputeResourceHandle)function.data(),
plot3.verticesSize());
do {
t += 0.01f;
generateCurve(t, DX, function);
copyToGLBuffer(handle, (ComputeResourceHandle)function.data(),
plot3.verticesSize());
wnd.draw(chart);
} while (!wnd.close());
releaseGLBuffer(handle);
return 0;
}
|
CUDA¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <cuComplex.h>
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
#include <iostream>
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
static const float ZMIN = 0.1f;
static const float ZMAX = 10.f;
const float DX = 0.005f;
const size_t ZSIZE = (size_t)((ZMAX - ZMIN) / DX + 1);
void kernel(float t, float dx, float* dev_out);
int main(void) {
float* dev_out;
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Three dimensional line plot demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_3D);
chart.setAxesLabelFormat("%3.1f", "%3.1f", "%.2e");
chart.setAxesLimits(-1.1f, 1.1f, -1.1f, 1.1f, 0.f, 10.f);
chart.setAxesTitles("x-axis", "y-axis", "z-axis");
forge::Plot plot3 = chart.plot(ZSIZE, forge::f32);
static float t = 0;
FORGE_CUDA_CHECK(cudaMalloc((void**)&dev_out, ZSIZE * 3 * sizeof(float)));
kernel(t, DX, dev_out);
GfxHandle* handle;
createGLBuffer(&handle, plot3.vertices(), FORGE_VERTEX_BUFFER);
/* copy your data into the vertex buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handle, (ComputeResourceHandle)dev_out,
plot3.verticesSize());
do {
t += 0.01f;
kernel(t, DX, dev_out);
copyToGLBuffer(handle, (ComputeResourceHandle)dev_out,
plot3.verticesSize());
wnd.draw(chart);
} while (!wnd.close());
FORGE_CUDA_CHECK(cudaFree(dev_out));
releaseGLBuffer(handle);
return 0;
}
__global__ void generateCurve(float t, float dx, float* out, const float ZMIN,
const size_t ZSIZE) {
int offset = blockIdx.x * blockDim.x + threadIdx.x;
float z = ZMIN + offset * dx;
if (offset < ZSIZE) {
out[3 * offset] = cos(z * t + t) / z;
out[3 * offset + 1] = sin(z * t + t) / z;
out[3 * offset + 2] = z + 0.1 * sin(t);
}
}
inline int divup(int a, int b) { return (a + b - 1) / b; }
void kernel(float t, float dx, float* dev_out) {
static const dim3 threads(1024);
dim3 blocks(divup(ZSIZE, 1024));
// clang-format off
generateCurve<<<blocks, threads>>>(t, dx, dev_out, ZMIN, ZSIZE);
// clang-format on
}
|
OpenCL¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#include "cl_helpers.h"
#include <algorithm>
#include <cmath>
#include <complex>
#include <iostream>
#include <iterator>
#include <mutex>
#include <vector>
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
static const float ZMIN = 0.1f;
static const float ZMAX = 10.f;
const float DX = 0.005f;
static const unsigned ZSIZE = (unsigned)((ZMAX - ZMIN) / DX + 1);
using namespace std;
#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>
// clang-format off
static const std::string sincos_surf_kernel =
R"EOK(
kernel
void generateCurve(global float* out, const float t,
const float dx, const float zmin,
const unsigned SIZE) {
int offset = get_global_id(0);
float z = zmin + offset * dx;
if (offset < SIZE) {
out[offset*3 + 0] = cos(z*t+t)/z;
out[offset*3 + 1] = sin(z*t+t)/z;
out[offset*3 + 2] = z + 0.1*sin(t);
}
}
)EOK";
// clang-format on
inline int divup(int a, int b) {
return (a + b - 1) / b;
}
void kernel(cl::Buffer& devOut, cl::CommandQueue& queue, float t) {
static std::once_flag compileFlag;
static cl::Program prog;
static cl::Kernel kern;
std::call_once(compileFlag, [queue]() {
prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(),
sincos_surf_kernel, true);
kern = cl::Kernel(prog, "generateCurve");
});
NDRange global(ZSIZE);
kern.setArg(0, devOut);
kern.setArg(1, t);
kern.setArg(2, DX);
kern.setArg(3, ZMIN);
kern.setArg(4, ZSIZE);
queue.enqueueNDRangeKernel(kern, cl::NullRange, global);
}
int main(void) {
try {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Three dimensional line plot demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_3D);
chart.setAxesLabelFormat("%3.1f", "%3.1f", "%.2e");
chart.setAxesLimits(-1.1f, 1.1f, -1.1f, 1.1f, 0.f, 10.f);
chart.setAxesTitles("x-axis", "y-axis", "z-axis");
forge::Plot plot3 = chart.plot(ZSIZE, forge::f32);
/*
* Helper function to create a CLGL interop context.
* This function checks for if the extension is available
* and creates the context on the appropriate device.
* Note: context and queue are defined in cl_helpers.h
*/
context = createCLGLContext(wnd);
Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
queue = CommandQueue(context, device);
cl::Buffer devOut(context, CL_MEM_READ_WRITE,
sizeof(float) * ZSIZE * 3);
static float t = 0;
kernel(devOut, queue, t);
GfxHandle* handle;
createGLBuffer(&handle, plot3.vertices(), FORGE_VERTEX_BUFFER);
/* copy your data into the pixel buffer object exposed by
* forge::Surface class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handle, (ComputeResourceHandle)devOut(),
plot3.verticesSize());
do {
t += 0.01f;
kernel(devOut, queue, t);
copyToGLBuffer(handle, (ComputeResourceHandle)devOut(),
plot3.verticesSize());
wnd.draw(chart);
} while (!wnd.close());
releaseGLBuffer(handle);
} catch (forge::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
} catch (cl::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
}
return 0;
}
|
Multiple line plots¶
CPU¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
const float FRANGE_START = 0.f;
const float FRANGE_END = 2.f * 3.1415926f;
using namespace std;
void map_range_to_vec_vbo(float range_start, float range_end, float dx,
std::vector<float>& vec, float (*map)(float)) {
if (range_start > range_end && dx > 0) return;
for (float i = range_start; i < range_end; i += dx) {
vec.push_back(i);
vec.push_back((*map)(i));
}
}
int main(void) {
std::vector<float> sinData;
std::vector<float> cosData;
std::vector<float> tanData;
std::vector<float> logData;
map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, sinData, &sinf);
map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, cosData, &cosf);
map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, tanData, &tanf);
map_range_to_vec_vbo(FRANGE_START, FRANGE_END, 0.1f, logData, &log10f);
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Plotting Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_2D);
chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);
/* Create several plot objects which creates the necessary
* vertex buffer objects to hold the different plot types
*/
forge::Plot plt0 = chart.plot((unsigned)(sinData.size() / 2),
forge::f32); // create a default plot
forge::Plot plt1 =
chart.plot((unsigned)(cosData.size() / 2), forge::f32, FG_PLOT_LINE,
FG_MARKER_NONE); // or specify a specific plot type
forge::Plot plt2 = chart.plot(
(unsigned)(tanData.size() / 2), forge::f32, FG_PLOT_LINE,
FG_MARKER_TRIANGLE); // last parameter specifies marker shape
forge::Plot plt3 = chart.plot((unsigned)(logData.size() / 2), forge::f32,
FG_PLOT_SCATTER, FG_MARKER_CROSS);
/*
* Set plot colors
*/
plt0.setColor(FG_RED);
plt1.setColor(FG_BLUE);
plt2.setColor(FG_YELLOW); // use a forge predefined color
plt3.setColor((forge::Color)0x257973FF); // or any hex-valued color
/*
* Set plot legends
*/
plt0.setLegend("Sine");
plt1.setLegend("Cosine");
plt2.setLegend("Tangent");
plt3.setLegend("Log base 10");
GfxHandle* handles[4];
createGLBuffer(&handles[0], plt0.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], plt1.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], plt2.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[3], plt3.vertices(), FORGE_VERTEX_BUFFER);
/* copy your data into the pixel buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handles[0], (ComputeResourceHandle)sinData.data(),
plt0.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)cosData.data(),
plt1.verticesSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)tanData.data(),
plt2.verticesSize());
copyToGLBuffer(handles[3], (ComputeResourceHandle)logData.data(),
plt3.verticesSize());
do { wnd.draw(chart); } while (!wnd.close());
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
releaseGLBuffer(handles[3]);
return 0;
}
|
CUDA¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <cuComplex.h>
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
#include <iostream>
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
static const float dx = 0.1f;
static const float FRANGE_START = 0.f;
static const float FRANGE_END = 2 * 3.141592f;
static const size_t DATA_SIZE = (size_t)((FRANGE_END - FRANGE_START) / dx);
void kernel(float* dev_out, int functionCode);
int main(void) {
float* sin_out;
float* cos_out;
float* tan_out;
float* log_out;
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Plotting Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_2D);
chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);
/* Create several plot objects which creates the necessary
* vertex buffer objects to hold the different plot types
*/
forge::Plot plt0 =
chart.plot(DATA_SIZE, forge::f32); // create a default plot
forge::Plot plt1 =
chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE,
FG_MARKER_NONE); // or specify a specific plot type
forge::Plot plt2 = chart.plot(
DATA_SIZE, forge::f32, FG_PLOT_LINE,
FG_MARKER_TRIANGLE); // last parameter specifies marker shape
forge::Plot plt3 =
chart.plot(DATA_SIZE, forge::f32, FG_PLOT_SCATTER, FG_MARKER_CROSS);
/*
* Set plot colors
*/
plt0.setColor(FG_RED);
plt1.setColor(FG_BLUE);
plt2.setColor(FG_YELLOW); // use a forge predefined color
plt3.setColor((forge::Color)0x257973FF); // or any hex-valued color
/*
* Set plot legends
*/
plt0.setLegend("Sine");
plt1.setLegend("Cosine");
plt2.setLegend("Tangent");
plt3.setLegend("Log base 10");
FORGE_CUDA_CHECK(
cudaMalloc((void**)&sin_out, sizeof(float) * DATA_SIZE * 2));
FORGE_CUDA_CHECK(
cudaMalloc((void**)&cos_out, sizeof(float) * DATA_SIZE * 2));
FORGE_CUDA_CHECK(
cudaMalloc((void**)&tan_out, sizeof(float) * DATA_SIZE * 2));
FORGE_CUDA_CHECK(
cudaMalloc((void**)&log_out, sizeof(float) * DATA_SIZE * 2));
kernel(sin_out, 0);
kernel(cos_out, 1);
kernel(tan_out, 2);
kernel(log_out, 3);
GfxHandle* handles[4];
createGLBuffer(&handles[0], plt0.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], plt1.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], plt2.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[3], plt3.vertices(), FORGE_VERTEX_BUFFER);
/* copy your data into the vertex buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handles[0], (ComputeResourceHandle)sin_out,
plt0.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)cos_out,
plt1.verticesSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)tan_out,
plt2.verticesSize());
copyToGLBuffer(handles[3], (ComputeResourceHandle)log_out,
plt3.verticesSize());
do { wnd.draw(chart); } while (!wnd.close());
FORGE_CUDA_CHECK(cudaFree(sin_out));
FORGE_CUDA_CHECK(cudaFree(cos_out));
FORGE_CUDA_CHECK(cudaFree(tan_out));
FORGE_CUDA_CHECK(cudaFree(log_out));
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
releaseGLBuffer(handles[3]);
return 0;
}
__global__ void simple_sinf(float* out, const size_t _data_size, int fnCode,
const float _dx, const float _frange_start) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < _data_size) {
float x = _frange_start + i * _dx;
int idx = 2 * i;
out[idx] = x;
switch (fnCode) {
case 0: out[idx + 1] = sinf(x); break;
case 1: out[idx + 1] = cosf(x); break;
case 2: out[idx + 1] = tanf(x); break;
case 3: out[idx + 1] = log10f(x); break;
}
}
}
inline int divup(int a, int b) { return (a + b - 1) / b; }
void kernel(float* dev_out, int functionCode) {
static const dim3 threads(1024);
dim3 blocks(divup(DATA_SIZE, 1024));
// clang-format off
simple_sinf<<<blocks, threads>>>(dev_out, DATA_SIZE, functionCode, dx,
FRANGE_START);
// clang-format on
}
|
OpenCL¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#include "cl_helpers.h"
#include <algorithm>
#include <iostream>
#include <iterator>
#include <mutex>
#include <sstream>
using namespace cl;
using namespace std;
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
const float dx = 0.1f;
const float FRANGE_START = 0.f;
const float FRANGE_END = 2 * 3.141592f;
const unsigned DATA_SIZE = (unsigned)((FRANGE_END - FRANGE_START) / dx);
#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>
// clang-format off
static const std::string sinf_ocl_kernel =
R"EOK(
kernel void sinf(global float* out, const float dx, const unsigned DATA_SIZE,
int fnCode) {
unsigned x = get_global_id(0);
if (x < DATA_SIZE) {
out[2 * x] = x * dx;
switch (fnCode) {
case 0: out[2 * x + 1] = sin(x * dx); break;
case 1: out[2 * x + 1] = cos(x * dx); break;
case 2: out[2 * x + 1] = tan(x * dx); break;
case 3: out[2 * x + 1] = log10(x * dx); break;
}
}
}
)EOK";
// clang-format on
void kernel(cl::Buffer& devOut, cl::CommandQueue& queue, int fnCode)
{
static std::once_flag compileFlag;
static cl::Program prog;
static cl::Kernel kern;
std::call_once(compileFlag, [queue]() {
prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), sinf_ocl_kernel,
true);
kern = cl::Kernel(prog, "sinf");
});
static const NDRange global(DATA_SIZE * 2);
kern.setArg(0, devOut);
kern.setArg(1, dx);
kern.setArg(2, DATA_SIZE);
kern.setArg(3, fnCode);
queue.enqueueNDRangeKernel(kern, cl::NullRange, global);
}
int main(void) {
try {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Plotting Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_2D);
chart.setAxesLimits(FRANGE_START, FRANGE_END, -1.0f, 1.0f);
/* Create several plot objects which creates the necessary
* vertex buffer objects to hold the different plot types
*/
forge::Plot plt0 =
chart.plot(DATA_SIZE, forge::f32); // create a default plot
forge::Plot plt1 =
chart.plot(DATA_SIZE, forge::f32, FG_PLOT_LINE,
FG_MARKER_NONE); // or specify a specific plot type
forge::Plot plt2 = chart.plot(
DATA_SIZE, forge::f32, FG_PLOT_LINE,
FG_MARKER_TRIANGLE); // last parameter specifies marker shape
forge::Plot plt3 =
chart.plot(DATA_SIZE, forge::f32, FG_PLOT_SCATTER, FG_MARKER_CROSS);
/*
* Set plot colors
*/
plt0.setColor(FG_RED);
plt1.setColor(FG_BLUE);
plt2.setColor(FG_YELLOW); // use a forge predefined color
plt3.setColor((forge::Color)0x257973FF); // or any hex-valued color
/*
* Set plot legends
*/
plt0.setLegend("Sine");
plt1.setLegend("Cosine");
plt2.setLegend("Tangent");
plt3.setLegend("Log base 10");
/*
* Helper function to create a CLGL interop context.
* This function checks for if the extension is available
* and creates the context on the appropriate device.
* Note: context and queue are defined in cl_helpers.h
*/
context = createCLGLContext(wnd);
Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
queue = CommandQueue(context, device);
cl::Buffer sinOut(context, CL_MEM_READ_WRITE,
sizeof(float) * DATA_SIZE * 2);
cl::Buffer cosOut(context, CL_MEM_READ_WRITE,
sizeof(float) * DATA_SIZE * 2);
cl::Buffer tanOut(context, CL_MEM_READ_WRITE,
sizeof(float) * DATA_SIZE * 2);
cl::Buffer logOut(context, CL_MEM_READ_WRITE,
sizeof(float) * DATA_SIZE * 2);
kernel(sinOut, queue, 0);
kernel(cosOut, queue, 1);
kernel(tanOut, queue, 2);
kernel(logOut, queue, 3);
GfxHandle* handles[4];
createGLBuffer(&handles[0], plt0.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], plt1.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], plt2.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[3], plt3.vertices(), FORGE_VERTEX_BUFFER);
/* copy your data into the vertex buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handles[0], (ComputeResourceHandle)sinOut(),
plt0.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)cosOut(),
plt1.verticesSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)tanOut(),
plt2.verticesSize());
copyToGLBuffer(handles[3], (ComputeResourceHandle)logOut(),
plt3.verticesSize());
do { wnd.draw(chart); } while (!wnd.close());
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
releaseGLBuffer(handles[3]);
} catch (forge::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
} catch (cl::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
}
return 0;
}
|
3D Vector Stream¶
CPU¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>
const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float PI = 3.14159265359f;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP = 2.0f;
const int NELEMS = (int)((MAXIMUM - MINIMUM + 1) / STEP);
using namespace std;
void generateColors(std::vector<float>& colors) {
static const float AF_BLUE[] = {0.0588f, 0.1137f, 0.2745f, 1.0f};
static const float AF_ORANGE[] = {0.8588f, 0.6137f, 0.0745f, 1.0f};
int numElems = NELEMS * NELEMS * NELEMS;
colors.clear();
for (int i = 0; i < numElems; ++i) {
if ((i % 2) == 0) {
colors.push_back(AF_ORANGE[0]);
colors.push_back(AF_ORANGE[1]);
colors.push_back(AF_ORANGE[2]);
} else {
colors.push_back(AF_BLUE[0]);
colors.push_back(AF_BLUE[1]);
colors.push_back(AF_BLUE[2]);
}
}
}
void generatePoints(std::vector<float>& points, std::vector<float>& dirs) {
points.clear();
for (int k = 0; k < NELEMS; ++k) {
float z = MINIMUM + k * STEP;
for (int j = 0; j < NELEMS; ++j) {
float y = MINIMUM + j * STEP;
for (int i = 0; i < NELEMS; ++i) {
float x = MINIMUM + i * STEP;
points.push_back(x);
points.push_back(y);
points.push_back(z);
dirs.push_back(x - 10.0f);
dirs.push_back(y - 10.0f);
dirs.push_back(z - 10.0f);
}
}
}
}
int main(void) {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "3D Vector Field Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_3D);
chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM,
MINIMUM - 1.0f, MAXIMUM);
chart.setAxesTitles("x-axis", "y-axis", "z-axis");
int numElems = NELEMS * NELEMS * NELEMS;
forge::VectorField field = chart.vectorField(numElems, forge::f32);
field.setColor(0.f, 1.f, 0.f, 1.f);
std::vector<float> points;
std::vector<float> colors;
std::vector<float> dirs;
generatePoints(points, dirs);
generateColors(colors);
GfxHandle* handles[3];
createGLBuffer(&handles[0], field.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], field.colors(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);
copyToGLBuffer(handles[0], (ComputeResourceHandle)points.data(),
field.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)colors.data(),
field.colorsSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs.data(),
field.directionsSize());
do { wnd.draw(chart); } while (!wnd.close());
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
return 0;
}
|
CUDA¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP = 2.0f;
const int NELEMS = (int)((MAXIMUM - MINIMUM + 1) / STEP);
void generateColors(float* colors);
void generatePoints(float* points, float* dirs);
inline int divup(int a, int b) { return (a + b - 1) / b; }
int main(void) {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "3D Vector Field Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_3D);
chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM,
MINIMUM - 1.0f, MAXIMUM);
chart.setAxesTitles("x-axis", "y-axis", "z-axis");
int numElems = NELEMS * NELEMS * NELEMS;
forge::VectorField field = chart.vectorField(numElems, forge::f32);
field.setColor(0.f, 1.f, 0.f, 1.f);
float* points;
float* colors;
float* dirs;
FORGE_CUDA_CHECK(cudaMalloc((void**)&points, 3 * numElems * sizeof(float)));
FORGE_CUDA_CHECK(cudaMalloc((void**)&colors, 3 * numElems * sizeof(float)));
FORGE_CUDA_CHECK(cudaMalloc((void**)&dirs, 3 * numElems * sizeof(float)));
generatePoints(points, dirs);
generateColors(colors);
GfxHandle* handles[3];
createGLBuffer(&handles[0], field.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], field.colors(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);
copyToGLBuffer(handles[0], (ComputeResourceHandle)points,
field.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)colors,
field.colorsSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs,
field.directionsSize());
do { wnd.draw(chart); } while (!wnd.close());
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
FORGE_CUDA_CHECK(cudaFree(points));
FORGE_CUDA_CHECK(cudaFree(colors));
FORGE_CUDA_CHECK(cudaFree(dirs));
return 0;
}
__global__ void genColorsKernel(float* colors, int nelems) {
const float AF_BLUE[4] = {0.0588f, 0.1137f, 0.2745f, 1.0f};
const float AF_ORANGE[4] = {0.8588f, 0.6137f, 0.0745f, 1.0f};
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < nelems) {
if (i % 2 == 0) {
colors[3 * i + 0] = AF_ORANGE[0];
colors[3 * i + 1] = AF_ORANGE[1];
colors[3 * i + 2] = AF_ORANGE[2];
} else {
colors[3 * i + 0] = AF_BLUE[0];
colors[3 * i + 1] = AF_BLUE[1];
colors[3 * i + 2] = AF_BLUE[2];
}
}
}
void generateColors(float* colors) {
const int numElems = NELEMS * NELEMS * NELEMS;
static const dim3 threads(512);
dim3 blocks(divup(numElems, threads.x));
// clang-format off
genColorsKernel<<<blocks, threads>>>(colors, numElems);
// clang-format on
}
__global__ void pointGenKernel(float* points, float* dirs, int nBBS0,
int nelems, float minimum, float step) {
int k = blockIdx.x / nBBS0;
int i = blockDim.x * (blockIdx.x - k * nBBS0) + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
if (i < nelems && j < nelems && k < nelems) {
float x = minimum + i * step;
float y = minimum + j * step;
float z = minimum + k * step;
int id = i + j * nelems + k * nelems * nelems;
points[3 * id + 0] = x;
points[3 * id + 1] = y;
points[3 * id + 2] = z;
dirs[3 * id + 0] = x - 10.f;
dirs[3 * id + 1] = y - 10.f;
dirs[3 * id + 2] = z - 10.f;
}
}
void generatePoints(float* points, float* dirs) {
static dim3 threads(8, 8);
int blk_x = divup(NELEMS, threads.x);
int blk_y = divup(NELEMS, threads.y);
dim3 blocks(blk_x * NELEMS, blk_y);
// clang-format off
pointGenKernel<<<blocks, threads>>>(points, dirs, blk_x, NELEMS, MINIMUM,
STEP);
// clang-format on
}
|
OpenCL¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#include "cl_helpers.h"
#include <algorithm>
#include <cmath>
#include <ctime>
#include <iostream>
#include <iterator>
#include <sstream>
#include <vector>
using namespace cl;
using namespace std;
const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP = 2.0f;
const int NELEMS = (int)((MAXIMUM - MINIMUM + 1) / STEP);
#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>
// clang-format off
static const std::string streamKernel =
R"EOK(
constant float AF_BLUE[4] = {0.0588f, 0.1137f, 0.2745f, 1.0f};
constant float AF_ORANGE[4] = {0.8588f, 0.6137f, 0.0745f, 1.0f};
kernel void genColorsKernel(global float* colors, int NELEMS) {
const size_t nelems = NELEMS * NELEMS * NELEMS;
int i = get_global_id(0);
if (i < nelems) {
if (i % 2 == 0) {
colors[3 * i + 0] = AF_ORANGE[0];
colors[3 * i + 1] = AF_ORANGE[1];
colors[3 * i + 2] = AF_ORANGE[2];
} else {
colors[3 * i + 0] = AF_BLUE[0];
colors[3 * i + 1] = AF_BLUE[1];
colors[3 * i + 2] = AF_BLUE[2];
}
}
}
kernel void pointGenKernel(global float* points, global float* dirs, int nBBS0,
int NELEMS, float MINIMUM, float STEP) {
int k = get_group_id(0) / nBBS0;
int i = get_local_size(0) * (get_group_id(0) - k * nBBS0) + get_local_id(0);
int j = get_global_id(1);
if (i < NELEMS && j < NELEMS && k < NELEMS) {
float x = MINIMUM + i * STEP;
float y = MINIMUM + j * STEP;
float z = MINIMUM + k * STEP;
int id = i + j * NELEMS + k * NELEMS * NELEMS;
points[3 * id + 0] = x;
points[3 * id + 1] = y;
points[3 * id + 2] = z;
dirs[3 * id + 0] = x - 10.f;
dirs[3 * id + 1] = y - 10.f;
dirs[3 * id + 2] = z - 10.f;
}
}
)EOK";
// clang-format on
inline int divup(int a, int b)
{
return (a + b - 1) / b;
}
void generatePoints(Buffer& points, Buffer& dirs, Buffer& colors,
CommandQueue& queue, Device& device) {
static bool compileFlag = true;
static cl::Program prog;
static cl::Kernel pointGenKernel;
static cl::Kernel colorsKernel;
if (compileFlag) {
try {
prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), streamKernel,
false);
std::vector<cl::Device> devs;
devs.push_back(device);
prog.build(devs);
pointGenKernel = cl::Kernel(prog, "pointGenKernel");
colorsKernel = cl::Kernel(prog, "genColorsKernel");
} catch (cl::Error err) {
std::cout << "Compile Errors: " << std::endl;
std::cout << err.what() << err.err() << std::endl;
std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
<< std::endl;
exit(255);
}
std::cout << "Kernels compiled successfully" << std::endl;
compileFlag = false;
}
static const NDRange local(8, 8);
int blk_x = divup(NELEMS, (int)(local[0]));
int blk_y = divup(NELEMS, (int)(local[1]));
NDRange global(NELEMS * local[0] * blk_x, local[1] * blk_y);
pointGenKernel.setArg(0, points);
pointGenKernel.setArg(1, dirs);
pointGenKernel.setArg(2, blk_x);
pointGenKernel.setArg(3, NELEMS);
pointGenKernel.setArg(4, MINIMUM);
pointGenKernel.setArg(5, STEP);
queue.enqueueNDRangeKernel(pointGenKernel, cl::NullRange, global, local);
const int numElems = NELEMS * NELEMS * NELEMS;
static const NDRange thrds(64, 1);
NDRange glob(thrds[0] * divup(numElems, (int)(thrds[0])), (int)(thrds[1]));
colorsKernel.setArg(0, colors);
colorsKernel.setArg(1, NELEMS);
queue.enqueueNDRangeKernel(colorsKernel, cl::NullRange, glob, thrds);
}
int main(void) {
try {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "3D Vector Field Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_3D);
chart.setAxesLimits(MINIMUM - 1.0f, MAXIMUM, MINIMUM - 1.0f, MAXIMUM,
MINIMUM - 1.0f, MAXIMUM);
chart.setAxesTitles("x-axis", "y-axis", "z-axis");
int numElems = NELEMS * NELEMS * NELEMS;
forge::VectorField field = chart.vectorField(numElems, forge::f32);
field.setColor(0.f, 1.f, 0.f, 1.f);
/*
* Helper function to create a CLGL interop context.
* This function checks for if the extension is available
* and creates the context on the appropriate device.
* Note: context and queue are defined in cl_helpers.h
*/
context = createCLGLContext(wnd);
Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
queue = CommandQueue(context, device);
cl::Buffer points(context, CL_MEM_READ_WRITE,
sizeof(float) * 3 * numElems);
cl::Buffer colors(context, CL_MEM_READ_WRITE,
sizeof(float) * 3 * numElems);
cl::Buffer dirs(context, CL_MEM_READ_WRITE,
sizeof(float) * 3 * numElems);
GfxHandle* handles[3];
createGLBuffer(&handles[0], field.vertices(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[1], field.colors(), FORGE_VERTEX_BUFFER);
createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);
generatePoints(points, dirs, colors, queue, device);
copyToGLBuffer(handles[0], (ComputeResourceHandle)points(),
field.verticesSize());
copyToGLBuffer(handles[1], (ComputeResourceHandle)colors(),
field.colorsSize());
copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs(),
field.directionsSize());
do { wnd.draw(chart); } while (!wnd.close());
releaseGLBuffer(handles[0]);
releaseGLBuffer(handles[1]);
releaseGLBuffer(handles[2]);
} catch (forge::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
} catch (cl::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
}
return 0;
}
|
Surfaces¶
CPU¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#define USE_FORGE_CPU_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cmath>
#include <complex>
#include <iostream>
#include <vector>
using namespace std;
static const float XMIN = -8.0f;
static const float XMAX = 8.0f;
static const float YMIN = -8.0f;
static const float YMAX = 8.0f;
const float DX = 0.5;
const size_t XSIZE = (size_t)((XMAX - XMIN) / DX);
const size_t YSIZE = (size_t)((YMAX - YMIN) / DX);
void genSurface(float dx, std::vector<float>& vec) {
vec.clear();
for (float x = XMIN; x < XMAX; x += dx) {
for (float y = YMIN; y < YMAX; y += dx) {
vec.push_back(x);
vec.push_back(y);
float z = sqrt(x * x + y * y) + 2.2204e-16f;
vec.push_back(sin(z) / z);
}
}
}
int main(void) {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(1024, 768, "3d Surface Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_3D);
chart.setAxesLimits(XMIN - 2.0f, XMAX + 2.0f, YMIN - 2.0f, YMAX + 2.0f,
-0.5f, 1.f);
chart.setAxesTitles("x-axis", "y-axis", "z-axis");
forge::Surface surf = chart.surface(XSIZE, YSIZE, forge::f32);
surf.setColor(FG_YELLOW);
// generate a surface
std::vector<float> function;
genSurface(DX, function);
GfxHandle* handle;
createGLBuffer(&handle, surf.vertices(), FORGE_VERTEX_BUFFER);
/* copy your data into the pixel buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handle, (ComputeResourceHandle)function.data(),
surf.verticesSize());
do { wnd.draw(chart); } while (!wnd.close());
releaseGLBuffer(handle);
return 0;
}
|
CUDA¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <cuComplex.h>
#include <cuda_runtime.h>
#include <forge.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <fg/compute_copy.h>
#include <cstdio>
#include <iostream>
const float XMIN = -8.0f;
const float XMAX = 8.f;
const float YMIN = -8.0f;
const float YMAX = 8.f;
const float DX = 0.5;
const size_t XSIZE = (size_t)((XMAX - XMIN) / DX);
const size_t YSIZE = (size_t)((YMAX - YMIN) / DX);
void kernel(float dx, float* dev_out);
int main(void) {
float* dev_out;
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(1024, 768, "3d Surface Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_3D);
chart.setAxesLimits(-10.f, 10.f, -10.f, 10.f, -0.5f, 1.f);
chart.setAxesTitles("x-axis", "y-axis", "z-axis");
forge::Surface surf = chart.surface(XSIZE, YSIZE, forge::f32);
surf.setColor(FG_YELLOW);
FORGE_CUDA_CHECK(
cudaMalloc((void**)&dev_out, XSIZE * YSIZE * 3 * sizeof(float)));
kernel(DX, dev_out);
GfxHandle* handle;
createGLBuffer(&handle, surf.vertices(), FORGE_VERTEX_BUFFER);
/* copy your data into the vertex buffer object exposed by
* forge::Plot class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handle, (ComputeResourceHandle)dev_out, surf.verticesSize());
do { wnd.draw(chart); } while (!wnd.close());
FORGE_CUDA_CHECK(cudaFree(dev_out));
releaseGLBuffer(handle);
return 0;
}
__global__ void sincos_surf(float dx, float* out, const float XMIN,
const float YMIN, const size_t XSIZE,
const size_t YSIZE) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
float x = XMIN + i * dx;
float y = YMIN + j * dx;
if (i < XSIZE && j < YSIZE) {
int offset = j + i * YSIZE;
out[3 * offset] = x;
out[3 * offset + 1] = y;
float z = sqrt(x * x + y * y) + 2.2204e-16;
out[3 * offset + 2] = sinf(z) / z;
}
}
inline int divup(int a, int b) { return (a + b - 1) / b; }
void kernel(float dx, float* dev_out) {
static const dim3 threads(8, 8);
dim3 blocks(divup(XSIZE, threads.x), divup(YSIZE, threads.y));
// clang-format off
sincos_surf<<<blocks, threads>>>(dx, dev_out, XMIN, YMIN, XSIZE, YSIZE);
// clang-format on
}
|
OpenCL¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | /*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#include "cl_helpers.h"
#include <algorithm>
#include <cmath>
#include <complex>
#include <iostream>
#include <iterator>
#include <mutex>
#include <vector>
static const float XMIN = -8.0f;
static const float XMAX = 8.f;
static const float YMIN = -8.0f;
static const float YMAX = 8.f;
const float DX = 0.5;
const unsigned XSIZE = (unsigned)((XMAX - XMIN) / DX);
const unsigned YSIZE = (unsigned)((YMAX - YMIN) / DX);
using namespace std;
#define USE_FORGE_OPENCL_COPY_HELPERS
#include <fg/compute_copy.h>
// clang-format off
static const std::string sin_surf_kernel =
R"EOK(
kernel void
surf(global float* out, const float dx, const float xmin, const float ymin,
const unsigned w, const unsigned h) {
int i = get_global_id(0);
int j = get_global_id(1);
float x = xmin + i * dx;
float y = ymin + j * dx;
if (i < w && j < h) {
int offset = j + i * h;
out[3 * offset] = x;
out[3 * offset + 1] = y;
float z = sqrt(x * x + y * y) + 2.2204e-16;
out[3 * offset + 2] = sin(z) / z;
}
}
)EOK";
// clang-format on
inline
int divup(int a, int b)
{
return (a + b - 1) / b;
}
void kernel(cl::Buffer& devOut, cl::CommandQueue& queue, cl::Device& device) {
static bool compileFlag = true;
static cl::Program prog;
static cl::Kernel kern;
if (compileFlag) {
try {
prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(),
sin_surf_kernel, false);
std::vector<cl::Device> devs;
devs.push_back(device);
prog.build(devs);
kern = cl::Kernel(prog, "surf");
} catch (cl::Error err) {
std::cout << "Compile Errors: " << std::endl;
std::cout << err.what() << err.err() << std::endl;
std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
<< std::endl;
exit(255);
}
std::cout << "Kernels compiled successfully" << std::endl;
compileFlag = false;
}
NDRange local(8, 8);
NDRange global(local[0] * divup(XSIZE, (int)(local[0])),
local[1] * divup(YSIZE, (int)(local[1])));
kern.setArg(0, devOut);
kern.setArg(1, DX);
kern.setArg(2, XMIN);
kern.setArg(3, YMIN);
kern.setArg(4, XSIZE);
kern.setArg(5, YSIZE);
queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local);
}
int main(void) {
try {
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(1024, 768, "3d Surface Demo");
wnd.makeCurrent();
forge::Chart chart(FG_CHART_3D);
chart.setAxesLimits(-10.f, 10.f, -10.f, 10.f, -0.5f, 1.f);
chart.setAxesTitles("x-axis", "y-axis", "z-axis");
forge::Surface surf = chart.surface(XSIZE, YSIZE, forge::f32);
surf.setColor(FG_YELLOW);
/*
* Helper function to create a CLGL interop context.
* This function checks for if the extension is available
* and creates the context on the appropriate device.
* Note: context and queue are defined in cl_helpers.h
*/
context = createCLGLContext(wnd);
Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
queue = CommandQueue(context, device);
cl::Buffer devOut(context, CL_MEM_READ_WRITE,
sizeof(float) * XSIZE * YSIZE * 3);
kernel(devOut, queue, device);
GfxHandle* handle;
createGLBuffer(&handle, surf.vertices(), FORGE_VERTEX_BUFFER);
/* copy your data into the pixel buffer object exposed by
* forge::Surface class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
copyToGLBuffer(handle, (ComputeResourceHandle)devOut(),
surf.verticesSize());
do { wnd.draw(chart); } while (!wnd.close());
releaseGLBuffer(handle);
} catch (forge::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
} catch (cl::Error err) {
std::cout << err.what() << "(" << err.err() << ")" << std::endl;
}
return 0;
}
|