#include<stdio.h>
|
#include<stdlib.h>
|
#include<iostream>
|
#include<CL/cl.h>
|
#include <chrono>
|
#include "OpenCLExcuter.h"
|
#include "ImgUtil.h"
|
using namespace std;
|
void printDevice();
|
uchar* createDemoData(int line);
|
uchar* createTemplateData(int line);
|
|
int LINE_NUMBER = 10;
|
int RE = _NUMBER_L2_TOTAL_NUMBER;
|
|
|
|
#define CHECK_ERRORS(ERR) \
|
if(ERR != CL_SUCCESS){ \
|
std::cerr << "OpenCL error code" << ERR << "file: " << __FILE__ << "line: " << __LINE__ << ".\nExiting..." << std::endl; \
|
exit(1); \
|
}
|
int main1() {
|
//printDevice();
|
//return 1;
|
auto starttime = std::chrono::steady_clock::now();
|
|
cl_int error;
|
cl_uint num_of_platforms = 0;
|
cl_platform_id platforms;
|
|
cl_device_id devices;
|
|
cl_context context;
|
|
FILE* program_handle;
|
size_t program_size;
|
char* program_buffer;
|
cl_program program;
|
|
size_t log_size;
|
char* program_log;
|
|
char kernel_name[] = "createBuffer";
|
cl_kernel kernel;
|
|
cl_command_queue queue;
|
//»ñȡƽ̨
|
error = clGetPlatformIDs(1, &platforms, &num_of_platforms);
|
if (error != 0) {
|
printf("Get platform failed!");
|
return -1;
|
}
|
//»ñÈ¡É豸
|
error = clGetDeviceIDs(platforms, CL_DEVICE_TYPE_GPU, 1, &devices, NULL);
|
if (error != 0) {
|
printf("Get device failed!");
|
return -1;
|
}
|
|
std::cout << " »ñÈ¡É豸£º" << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - starttime).count() << std::endl;
|
|
//´´½¨ÉÏÏÂÎÄ
|
context = clCreateContext(NULL, 1, &devices, NULL, NULL, &error);
|
if (error != 0) {
|
printf("Creat context failed!");
|
return -1;
|
}
|
std::cout << " ´´½¨ÉÏÏÂÎÄ£º" << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - starttime).count() << std::endl;
|
//´´½¨³ÌÐò£»×¢ÒâÒªÓÃ"rb"
|
fopen_s(&program_handle,"kernel.cl", "rb");
|
if (program_handle == NULL) {
|
printf("The kernle can not be opened!");
|
return -1;
|
}
|
fseek(program_handle, 0, SEEK_END);
|
program_size = ftell(program_handle);
|
rewind(program_handle);
|
|
program_buffer = (char*)malloc(program_size + 1);
|
program_buffer[program_size] = '\0';
|
error = fread(program_buffer, sizeof(char), program_size, program_handle);
|
if (error == 0) {
|
printf("Read kernel failed!");
|
return -1;
|
}
|
fclose(program_handle);
|
program = clCreateProgramWithSource(context, 1, (const char**)&program_buffer,
|
&program_size, &error);
|
if (error < 0) {
|
printf("Couldn't create the program!");
|
return -1;
|
}
|
//±àÒë³ÌÐò
|
error = clBuildProgram(program, 1, &devices, NULL, NULL, NULL);
|
if (error < 0) {
|
//È·¶¨ÈÕÖ¾ÎļþµÄ´óС
|
clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
program_log = (char*)malloc(log_size + 1);
|
program_log[log_size] = '\0';
|
//¶ÁÈ¡ÈÕÖ¾
|
clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG,
|
log_size + 1, program_log, NULL);
|
printf("%s\n", program_log);
|
free(program_log);
|
return -1;
|
}
|
free(program_buffer);
|
|
//´´½¨ÃüÁî¶ÓÁÐ
|
queue = clCreateCommandQueue(context, devices, CL_QUEUE_PROFILING_ENABLE, &error);
|
if (error < 0) {
|
printf("Coudn't create the command queue");
|
return -1;
|
}
|
//----------³ÌÐòÕýÎÄ¿ªÊ¼---------
|
|
for (int n = 0;n < 100;n++)
|
{
|
|
auto startexectime = std::chrono::steady_clock::now();
|
|
|
|
//´´½¨ÄÚºË
|
kernel = clCreateKernel(program, "createBuffer", &error);
|
if (kernel == NULL) {
|
printf("Couldn't create kernel!\n");
|
return -1;
|
}
|
//³õʼ»¯²ÎÊý
|
const int size = 70000;
|
float result[size];
|
float a_in[size];
|
float b_in[size];
|
for (int i = 0; i < size; i++) {
|
a_in[i] = i+n;
|
b_in[i] = i * 2.0;
|
}
|
//´´½¨»º´æ¶ÔÏó
|
cl_mem memObject1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * size, a_in, &error);
|
if (error < 0) {
|
printf("Creat memObject1 failed!\n");
|
return -1;
|
}
|
cl_mem memObject2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
sizeof(float) * size, b_in, &error);
|
if (error < 0) {
|
printf("Creat memObject2 failed!\n");
|
return -1;
|
}
|
cl_mem memObject3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
sizeof(float) * size, NULL, &error);
|
if (error < 0) {
|
printf("Creat memObject3 failed!\n");
|
return -1;
|
}
|
//ÉèÖÃÄں˲ÎÊý
|
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObject1);
|
error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObject2);
|
error |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObject3);
|
if (error != CL_SUCCESS) {
|
printf("Error setting kernel arguments!\n");
|
return -1;
|
}
|
//Ö´ÐÐÄÚºË
|
size_t globalWorkSize[1] = { size };
|
size_t localWorkSize[1] = { 1 };
|
|
const int work_dim = 2;
|
|
error = clEnqueueNDRangeKernel(queue, kernel, work_dim, NULL, globalWorkSize,
|
localWorkSize, 0, NULL, NULL);
|
if (error != CL_SUCCESS) {
|
printf("Error queuing kernel for execution!\n");
|
return -1;
|
}
|
|
//¶ÁȡִÐнá¹û
|
error = clEnqueueReadBuffer(queue, memObject3, CL_TRUE, 0, size * sizeof(float),
|
result, 0, NULL, NULL);
|
if (error != CL_SUCCESS) {
|
printf("Error reading result buffer!\n");
|
return -1;
|
}
|
//ÏÔʾ½á¹û
|
for (int i = 0; i < 2; i++) {
|
printf("%f ", result[i]);
|
}
|
|
clReleaseKernel(kernel);
|
clReleaseMemObject(memObject1);
|
clReleaseMemObject(memObject2);
|
clReleaseMemObject(memObject3);
|
|
|
std::cout << " Ö´ÐнáÊø£º" << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - startexectime).count() << std::endl;
|
}
|
|
//-------------³ÌÐòÕýÎĽáÊø
|
|
|
|
//ÊÍ·Å×ÊÔ´
|
clReleaseDevice(devices);
|
clReleaseContext(context);
|
clReleaseProgram(program);
|
clReleaseCommandQueue(queue);
|
|
std::cout << " ºÄʱ£º" << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - starttime).count() << std::endl;
|
return 0;
|
}
|
|
|
int main3() {
|
ImgUtil::init();
|
OpenCLExcuter* op = new OpenCLExcuter();
|
int dataCount = 100;
|
|
|
int rows = 8* dataCount;
|
int cols = 5 * LINE_NUMBER * RE;
|
uchar* b_in = createTemplateData(dataCount);
|
uchar* a_in = createDemoData(dataCount);
|
|
//printf("ÊäÈë1: \n");
|
for (int r = 0;r < rows;r++) {
|
for (int c = 0;c < cols;c++) {
|
// printf("%d ", a_in[r*cols+c]);
|
}
|
// printf("\n");
|
}
|
|
//printf("ÊäÈë2: \n");
|
for (int r = 0;r < rows;r++) {
|
for (int c = 0;c < cols;c++) {
|
// printf("%d ", b_in[r * cols + c]);
|
}
|
//printf("\n");
|
}
|
|
/*
|
int rows = 10;
|
int cols = 10;
|
uchar* b_in = (uchar*)malloc(sizeof(uchar*)*400);
|
uchar* a_in = (uchar*)malloc(sizeof(uchar*) * 400);
|
for (int i = 0;i < 400;i++) {
|
b_in[i] = i;
|
a_in[i] = i + 1;
|
}
|
*/
|
|
op->init();
|
for (int i = 0;i < 100;i++)
|
{
|
op->recognition_numbers(a_in, b_in, rows, cols ,_NUMBER_L2_WIDTH, _NUMBER_L2_HEIGHT, _NUMBER_L2_TOTAL_NUMBER);
|
}
|
op->destory();
|
|
return 1;
|
}
|
|
uchar* createDemoData(int line) {
|
unsigned char* data = (unsigned char*)malloc(sizeof(unsigned char) * (_NUMBER_L2_HEIGHT * line) * 5 * LINE_NUMBER * RE);
|
int outLineDataCount = 8 * 5 * LINE_NUMBER * RE;
|
int inLineDataCount = 5 * LINE_NUMBER * RE;
|
for (int l = 0;l < line;l++)
|
{
|
int outLineCount = outLineDataCount * l;
|
for (int re = 0;re < RE;re++) {
|
|
|
|
for (int n = 0;n < LINE_NUMBER;n++)
|
{
|
for (int r = 0;r < 8;r++) {
|
int intLineCount = inLineDataCount * r;
|
|
for (int c = 0;c < 5;c++) {
|
uchar value = ImgUtil::NUMS_LEVEL2[re%10].data.ptr(r)[c];
|
int index = outLineCount;
|
index += intLineCount;
|
int x = re * LINE_NUMBER * 5 + n * 5 + c;
|
index += x;
|
data[index] = value > 40 ? 1 : 0;
|
}
|
}
|
}
|
}
|
}
|
return data;
|
}
|
|
uchar* createTemplateData(int line) {
|
unsigned char* data = (unsigned char*)malloc(sizeof(unsigned char) * (_NUMBER_L2_HEIGHT * line) * _NUMBER_L2_WIDTH * LINE_NUMBER * RE);
|
int outLineDataCount = _NUMBER_L2_HEIGHT * _NUMBER_L2_WIDTH * LINE_NUMBER * RE;
|
int inLineDataCount = _NUMBER_L2_WIDTH * LINE_NUMBER * RE;
|
for (int l = 0;l < line;l++)
|
{
|
int outLineCount = outLineDataCount * l;
|
for (int re = 0;re < RE;re++) {
|
for (int n = 0;n < LINE_NUMBER;n++)
|
{
|
for (int r = 0;r < _NUMBER_L2_HEIGHT;r++) {
|
int intLineCount = inLineDataCount * r;
|
|
for (int c = 0;c < _NUMBER_L2_WIDTH;c++) {
|
uchar value = ImgUtil::NUMS_LEVEL2[n].data.ptr(r)[c];
|
int index = outLineCount;
|
index += intLineCount;
|
int x = re * LINE_NUMBER * _NUMBER_L2_WIDTH + n * _NUMBER_L2_WIDTH + c;
|
index += x;
|
data[index] = value > 40 ? 1 : 0;
|
}
|
}
|
}
|
}
|
}
|
return data;
|
}
|
|
|
void printDevice() {
|
|
cl_int err = CL_SUCCESS;
|
|
// 1. »ñÈ¡µ±Ç°É豸ËùÓÐÖ§³ÖOpenCLµÄƽ̨µÄÊýÁ¿
|
cl_uint num_of_platforms = 0;
|
err = clGetPlatformIDs(0, 0, &num_of_platforms);
|
CHECK_ERRORS(err);
|
|
// 2. »ñÈ¡µ±Ç°É豸ËùÓÐÖ§³ÖOpenCLµÄƽ̨µÄÐÅÏ¢
|
cl_platform_id* platforms = new cl_platform_id[num_of_platforms];
|
err = clGetPlatformIDs(num_of_platforms, platforms, 0);
|
CHECK_ERRORS(err);
|
|
cl_char platform_names[10][50];
|
// 3. ´òӡƽ̨ÐÅÏ¢
|
cout << "ƽ̨ÐÅÏ¢£º\n";
|
for (cl_uint i = 0; i < num_of_platforms; i++)
|
{
|
// »ñȡƽ̨×Ö·û´®µÄ³¤¶È
|
size_t platform_name_length = 0;
|
err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 0, 0, &platform_name_length);
|
CHECK_ERRORS(err);
|
|
// »ñȡƽ̨×Ö·û´®
|
char* platform_name = new char[platform_name_length];
|
err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, platform_name_length, platform_name, 0);
|
CHECK_ERRORS(err);
|
|
cout << " [" << i << "] " << platform_name << endl;
|
|
// ±¸·Ýplatform name
|
if (i < 10) {
|
memset(platform_names[i], 0, 50);
|
memcpy(platform_names[i], platform_name, platform_name_length);
|
}
|
|
delete[] platform_name;
|
}
|
|
// 4. ²éѯ¸÷ƽ̨É豸ÊýÁ¿
|
struct {
|
cl_device_type type;
|
const char* name;
|
cl_uint count;
|
}devices[] = {
|
{CL_DEVICE_TYPE_GPU, "CL_DEVICE_TYPE_GPU", 0}, // GPU
|
{CL_DEVICE_TYPE_CPU, "CL_DEVICE_TYPE_CPU", 0}, // CPU
|
{CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", 0} // ¼ÓËÙÆ÷
|
};
|
|
// 5. ±éÀú²éѯ¸÷¸öƽ̨ÏÂÓµÓеÄÉ豸ÊýÁ¿
|
for (cl_int j = 0; j < num_of_platforms; j++) {
|
cl_platform_id platform = platforms[j];
|
cout << "\nplatform:" << platform_names[j] << endl;
|
|
// 6. ±éÀú²éѯGPU¡¢CPU¡¢ACCELERATORÉ豸µÄÊýÁ¿
|
for (cl_int i = 0; i < (sizeof(devices) / sizeof(devices[0])); i++)
|
{
|
err = clGetDeviceIDs(platform, devices[i].type, 0, 0, &devices[i].count);
|
if (err == CL_DEVICE_NOT_FOUND) {
|
devices[i].count = 0;
|
err = CL_SUCCESS;
|
}
|
CHECK_ERRORS(err);
|
|
cout << "\tdevices:" << devices[i].name
|
<< "\tcount:" << devices[i].count;
|
|
if (devices[i].count != 0) {
|
// 7. ±éÀú²éѯGPU¡¢CPU¡¢ACCELERATOR ËùÓÐÉ豸µÄÐÅÏ¢
|
cl_device_id* device = new cl_device_id[devices[i].count];
|
err = clGetDeviceIDs(platform, devices[i].type, devices[i].count, device, 0);
|
|
cout << "\t\tdevice name:";
|
for (cl_int k = 0; k < devices[i].count; k++)
|
{
|
// 8. »ñÈ¡ºÍ´òÓ¡¸÷¸öÉ豸µÄname
|
size_t length = 0;
|
cl_device_id each_device = device[k];
|
err = clGetDeviceInfo(each_device, CL_DEVICE_NAME, 0, 0, &length);
|
CHECK_ERRORS(err);
|
|
char* value = new char[length];
|
err = clGetDeviceInfo(each_device, CL_DEVICE_NAME, length, value, 0);
|
CHECK_ERRORS(err);
|
cout << value << " ";
|
|
// 9. »ñÈ¡ºÍ´òÓ¡¸÷¸öÉ豸µÄversion
|
err = clGetDeviceInfo(each_device, CL_DEVICE_VERSION, 0, 0, &length);
|
CHECK_ERRORS(err);
|
|
char* version = new char[length];
|
err = clGetDeviceInfo(each_device, CL_DEVICE_VERSION, length, version, 0);
|
CHECK_ERRORS(err);
|
cout << version << " ";
|
|
delete[] value;
|
delete[] version;
|
}
|
|
delete[] device;
|
}
|
cout << endl;
|
}
|
}
|
|
delete[] platforms;
|
}
|