First Commit
This commit is contained in:
180
opencl_add_cpu.c
Normal file
180
opencl_add_cpu.c
Normal file
@@ -0,0 +1,180 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <CL/cl.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#define ARRAY_SIZE 15000576 // 100 million items
|
||||
#define LOCAL_SIZE 256 // Tunable work-group size
|
||||
|
||||
const char *kernelSource =
|
||||
"__kernel void add(__global const float *a, __global const float *b, __global float *c) { "
|
||||
" int i = get_global_id(0); "
|
||||
" c[i] = a[i] + b[i]; "
|
||||
"} ";
|
||||
|
||||
double get_time() {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return tv.tv_sec + tv.tv_usec * 1e-6;
|
||||
}
|
||||
|
||||
int main() {
|
||||
float *a = (float*)malloc(sizeof(float) * ARRAY_SIZE);
|
||||
float *b = (float*)malloc(sizeof(float) * ARRAY_SIZE);
|
||||
float *c = (float*)malloc(sizeof(float) * ARRAY_SIZE);
|
||||
|
||||
// Initialize arrays
|
||||
for (int i = 0; i < ARRAY_SIZE; i++) {
|
||||
a[i] = i;
|
||||
b[i] = i * 2;
|
||||
}
|
||||
|
||||
// Get all platforms
|
||||
cl_uint platformCount;
|
||||
clGetPlatformIDs(0, NULL, &platformCount);
|
||||
cl_platform_id *platforms = (cl_platform_id *)malloc(platformCount * sizeof(cl_platform_id));
|
||||
clGetPlatformIDs(platformCount, platforms, NULL);
|
||||
|
||||
// Find the POCL platform
|
||||
cl_platform_id poclPlatform = NULL;
|
||||
for (cl_uint i = 0; i < platformCount; i++) {
|
||||
char platformName[128];
|
||||
clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 128, platformName, NULL);
|
||||
if (strstr(platformName, "Portable Computing Language") != NULL) {
|
||||
poclPlatform = platforms[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (poclPlatform == NULL) {
|
||||
printf("POCL platform not found!\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Get the CPU device from the POCL platform
|
||||
cl_device_id cpuDevice = NULL;
|
||||
cl_uint deviceCount;
|
||||
cl_int ret = clGetDeviceIDs(poclPlatform, CL_DEVICE_TYPE_CPU, 1, &cpuDevice, &deviceCount);
|
||||
if (ret != CL_SUCCESS || deviceCount == 0) {
|
||||
printf("No CPU device found on POCL platform!\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create an OpenCL context
|
||||
cl_context context = clCreateContext(NULL, 1, &cpuDevice, NULL, NULL, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
printf("Error creating context: %d\n", ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create a command queue
|
||||
cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, cpuDevice, NULL, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
printf("Error creating command queue: %d\n", ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create memory buffers on the device
|
||||
cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, ARRAY_SIZE * sizeof(float), NULL, &ret);
|
||||
cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, ARRAY_SIZE * sizeof(float), NULL, &ret);
|
||||
cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, ARRAY_SIZE * sizeof(float), NULL, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
printf("Error creating memory buffers: %d\n", ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Time data transfer to device
|
||||
double start = get_time();
|
||||
ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, ARRAY_SIZE * sizeof(float), a, 0, NULL, NULL);
|
||||
ret |= clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, ARRAY_SIZE * sizeof(float), b, 0, NULL, NULL);
|
||||
double transfer_to_device_time = get_time() - start;
|
||||
if (ret != CL_SUCCESS) {
|
||||
printf("Error writing to memory buffers: %d\n", ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create a program from the kernel source
|
||||
cl_program program = clCreateProgramWithSource(context, 1, (const char **)&kernelSource, NULL, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
printf("Error creating program: %d\n", ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Build the program
|
||||
ret = clBuildProgram(program, 1, &cpuDevice, NULL, NULL, NULL);
|
||||
if (ret != CL_SUCCESS) {
|
||||
size_t log_size;
|
||||
clGetProgramBuildInfo(program, cpuDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
||||
char *log = (char *)malloc(log_size);
|
||||
clGetProgramBuildInfo(program, cpuDevice, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
|
||||
printf("Kernel compilation error:\n%s\n", log);
|
||||
free(log);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create the OpenCL kernel
|
||||
cl_kernel kernel = clCreateKernel(program, "add", &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
printf("Error creating kernel: %d\n", ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Set the arguments of the kernel
|
||||
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
|
||||
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
|
||||
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
|
||||
if (ret != CL_SUCCESS) {
|
||||
printf("Error setting kernel arguments: %d\n", ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Time kernel execution
|
||||
start = get_time();
|
||||
size_t global_item_size = ARRAY_SIZE;
|
||||
size_t local_item_size = LOCAL_SIZE;
|
||||
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
|
||||
clFinish(command_queue);
|
||||
double kernel_time = get_time() - start;
|
||||
if (ret != CL_SUCCESS) {
|
||||
printf("Error executing kernel: %d\n", ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Time data transfer back to host
|
||||
start = get_time();
|
||||
ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, ARRAY_SIZE * sizeof(float), c, 0, NULL, NULL);
|
||||
double transfer_to_host_time = get_time() - start;
|
||||
if (ret != CL_SUCCESS) {
|
||||
printf("Error reading from memory buffer: %d\n", ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Print timing results
|
||||
printf("Data transfer to device: %.3f ms\n", transfer_to_device_time * 1000);
|
||||
printf("Kernel execution time: %.3f ms\n", kernel_time * 1000);
|
||||
printf("Data transfer to host: %.3f ms\n", transfer_to_host_time * 1000);
|
||||
printf("Total time: %.3f ms\n", (transfer_to_device_time + kernel_time + transfer_to_host_time) * 1000);
|
||||
|
||||
// Print a sample of the result
|
||||
for (int i = 0; i < 10; i++) {
|
||||
printf("c[%d] = %f\n", i, c[i]);
|
||||
}
|
||||
|
||||
// Clean up
|
||||
clFlush(command_queue);
|
||||
clFinish(command_queue);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
clReleaseMemObject(a_mem_obj);
|
||||
clReleaseMemObject(b_mem_obj);
|
||||
clReleaseMemObject(c_mem_obj);
|
||||
clReleaseCommandQueue(command_queue);
|
||||
clReleaseContext(context);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
free(platforms);
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user