OpenCL_LAB2
2017-04-15 本文已影响0人
Bing2464
运行hello.cpp & 运行vadd.cpp
矩阵乘法
#include <iostream>
#include <fstream>
#include <cmath>
#include <cstring>
#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
// 把文本文件读入一个 string 中
int convertToString(const char *filename, std::string &s) {
size_t size;
char *str;
std::fstream f(filename, (std::fstream::in | std::fstream::binary));
if (f.is_open()) {
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t) f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size + 1];
f.read(str, fileSize);
f.close();
str[size] = '\0';
s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file %s\n", filename);
return 1;
}
int main(int argc, char *argv[]) {
double cputime, gputime;
clock_t timestamp;
const int W = 100;
const int mat_size = W * W;
// 在 host 内存中创建三个缓冲区
float *const buf1 = (float *) malloc(mat_size * sizeof(float));
float *const buf2 = (float *) malloc(mat_size * sizeof(float));
float *const buf = (float *) malloc(mat_size * sizeof(float));
float *const op_data = (float *) malloc(mat_size * sizeof(float));
// 初始化矩阵
srand((unsigned int) time(NULL));
for (int i = 0; i < mat_size; i++)
buf1[i] = float(rand() % 1000) * M_PI;
srand((unsigned int) time(NULL) + 1000);
for (int i = 0; i < mat_size; i++)
buf2[i] = float(rand() % 1000) * M_PI;
// 时间戳
timestamp = clock();
for (int i = 0; i < mat_size; i++) {
float tmp = 0.0;
for (int k = 0; k < W; k++)
tmp += buf1[i * W + k] * buf2[k * W + i];
buf[i * W + i] = tmp;
}
cputime = (double) (clock() - timestamp) / CLOCKS_PER_SEC * 1000;
printf("串行执行时间:%8.3f ms\n", cputime);
cl_platform_id platform;
cl_event prof_event;
// 创建平台对象
clGetPlatformIDs(1, &platform, NULL);
cl_device_id device;
// 创建 GPU 设备
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
// 创建 context
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
// 创建命令队列
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL);
// 创建三个 OpenCL 内存对象
cl_mem objects[3];
objects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * mat_size, buf1,
NULL);
objects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * mat_size, buf2,
NULL);
objects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * mat_size, buf,
NULL);
const char *filename = "mul.cl";
std::string sourceStr;
convertToString(filename, sourceStr);
const char *source = sourceStr.c_str();
size_t sourceSize[] = {strlen(source)};
cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
// 编译程序对象
clBuildProgram(program, 1, &device, NULL, NULL, NULL);
// 创建 Kernel 对象
cl_kernel kernel = clCreateKernel(program, "matrix_mult", NULL);
// 设置 Kernel 参数
clSetKernelArg(kernel, 0, sizeof(int), &W);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &objects[0]);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &objects[1]);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &objects[2]);
//执行 kernel
cl_ulong ev_start_time = (cl_ulong) 0;
cl_ulong ev_end_time = (cl_ulong) 0;
size_t global[1];
global[0] = (size_t) W;
clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, NULL, 0, NULL, &prof_event);
clFinish(queue);
//读取时间
clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &ev_start_time, NULL);
clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ev_end_time, NULL);
gputime = (double) (ev_end_time - ev_start_time) * 1e-6;
printf("OpenCL 执行时间:%8.3f ms\n", gputime);
//数据拷回 host 内存
clEnqueueReadBuffer(queue, objects[2], CL_TRUE, 0, sizeof(float) * mat_size, op_data, 0, NULL, NULL);
// 验证 GPU 计算结果
for (int i = 0; i < mat_size; i++) {
if (fabs(buf[i] - op_data[i]) > 0.0001) {
printf("check failed\n");
break;
}
}
if (buf1)
free(buf1);
if (buf2)
free(buf2);
if (buf)
free(buf);
if (op_data)
free(op_data);
// 删除 OpenCL 资源对象
clReleaseMemObject(objects[2]);
clReleaseMemObject(objects[1]);
clReleaseMemObject(objects[0]);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}