103 lines
3.9 KiB
C
103 lines
3.9 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <CL/cl.h>
|
|
|
|
#define ARRAY_SIZE 256
|
|
|
|
// Helper macro for robust error checking
|
|
#define CL_CHECK(err) do { \
|
|
if (err != CL_SUCCESS) { \
|
|
printf("OpenCL error at %s:%d code %d\n", __FILE__, __LINE__, err); \
|
|
exit(1); \
|
|
} \
|
|
} while(0)
|
|
|
|
const char *kernelSource =
|
|
"__kernel void add(__global const float *a, __global const float *b, __global float *c) { "
|
|
" int i = get_global_id(0); "
|
|
" c[i] = a[i] + b[i]; "
|
|
"} ";
|
|
|
|
int main() {
|
|
// ... (rest of the code is the same, but with added prints)
|
|
|
|
printf("Initializing host arrays...\n"); fflush(stdout);
|
|
float *a = (float*)malloc(sizeof(float) * ARRAY_SIZE);
|
|
float *b = (float*)malloc(sizeof(float) * ARRAY_SIZE);
|
|
float *c = (float*)malloc(sizeof(float) * ARRAY_SIZE);
|
|
for (int i = 0; i < ARRAY_SIZE; i++) {
|
|
a[i] = (float)i;
|
|
b[i] = (float)i * 2.0f;
|
|
}
|
|
|
|
cl_int ret;
|
|
|
|
printf("Getting platform...\n"); fflush(stdout);
|
|
cl_platform_id platform;
|
|
CL_CHECK(clGetPlatformIDs(1, &platform, NULL));
|
|
|
|
printf("Getting GPU device...\n"); fflush(stdout);
|
|
cl_device_id gpuDevice;
|
|
CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &gpuDevice, NULL));
|
|
|
|
char deviceName[128];
|
|
CL_CHECK(clGetDeviceInfo(gpuDevice, CL_DEVICE_NAME, 128, deviceName, NULL));
|
|
printf("Using GPU device: %s\n", deviceName); fflush(stdout);
|
|
|
|
printf("Creating context...\n"); fflush(stdout);
|
|
cl_context context = clCreateContext(NULL, 1, &gpuDevice, NULL, NULL, &ret);
|
|
CL_CHECK(ret);
|
|
|
|
printf("Creating command queue...\n"); fflush(stdout);
|
|
cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, gpuDevice, 0, &ret);
|
|
CL_CHECK(ret);
|
|
|
|
printf("Creating buffers...\n"); fflush(stdout);
|
|
cl_mem a_mem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE * sizeof(float), a, &ret); CL_CHECK(ret);
|
|
cl_mem b_mem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE * sizeof(float), b, &ret); CL_CHECK(ret);
|
|
cl_mem c_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, ARRAY_SIZE * sizeof(float), NULL, &ret); CL_CHECK(ret);
|
|
|
|
printf("Creating and building program...\n"); fflush(stdout);
|
|
cl_program program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &ret); CL_CHECK(ret);
|
|
ret = clBuildProgram(program, 1, &gpuDevice, NULL, NULL, NULL);
|
|
if (ret != CL_SUCCESS) {
|
|
// ... (error handling for build)
|
|
exit(1);
|
|
}
|
|
|
|
printf("Creating kernel...\n"); fflush(stdout);
|
|
cl_kernel kernel = clCreateKernel(program, "add", &ret); CL_CHECK(ret);
|
|
|
|
printf("Setting kernel arguments...\n"); fflush(stdout);
|
|
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem));
|
|
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem));
|
|
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem));
|
|
|
|
size_t global_size = ARRAY_SIZE;
|
|
|
|
printf("Enqueuing kernel for execution...\n"); fflush(stdout);
|
|
CL_CHECK(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL));
|
|
|
|
printf("Waiting for commands to finish...\n"); fflush(stdout);
|
|
CL_CHECK(clFinish(command_queue));
|
|
|
|
printf("Reading result buffer...\n"); fflush(stdout);
|
|
CL_CHECK(clEnqueueReadBuffer(command_queue, c_mem, CL_TRUE, 0, ARRAY_SIZE * sizeof(float), c, 0, NULL, NULL));
|
|
|
|
printf("Execution finished. Verifying results...\n"); fflush(stdout);
|
|
// ... (verification and cleanup)
|
|
|
|
// Cleanup
|
|
clReleaseMemObject(a_mem);
|
|
clReleaseMemObject(b_mem);
|
|
clReleaseMemObject(c_mem);
|
|
clReleaseKernel(kernel);
|
|
clReleaseProgram(program);
|
|
clReleaseCommandQueue(command_queue);
|
|
clReleaseContext(context);
|
|
free(a);
|
|
free(b);
|
|
free(c);
|
|
|
|
return 0;
|
|
} |