#include #include #include #include #include #include "OpenCLExcuter.h" #include "ImgUtil.h" using namespace std; void printDevice(); uchar* createDemoData(int line); uchar* createTemplateData(int line); int LINE_NUMBER = 10; int RE = _NUMBER_L2_TOTAL_NUMBER; #define CHECK_ERRORS(ERR) \ if(ERR != CL_SUCCESS){ \ std::cerr << "OpenCL error code" << ERR << "file: " << __FILE__ << "line: " << __LINE__ << ".\nExiting..." << std::endl; \ exit(1); \ } int main1() { //printDevice(); //return 1; auto starttime = std::chrono::steady_clock::now(); cl_int error; cl_uint num_of_platforms = 0; cl_platform_id platforms; cl_device_id devices; cl_context context; FILE* program_handle; size_t program_size; char* program_buffer; cl_program program; size_t log_size; char* program_log; char kernel_name[] = "createBuffer"; cl_kernel kernel; cl_command_queue queue; //»ñȡƽ̨ error = clGetPlatformIDs(1, &platforms, &num_of_platforms); if (error != 0) { printf("Get platform failed!"); return -1; } //»ñÈ¡É豸 error = clGetDeviceIDs(platforms, CL_DEVICE_TYPE_GPU, 1, &devices, NULL); if (error != 0) { printf("Get device failed!"); return -1; } std::cout << " »ñÈ¡É豸£º" << std::chrono::duration_cast(std::chrono::steady_clock::now() - starttime).count() << std::endl; //´´½¨ÉÏÏÂÎÄ context = clCreateContext(NULL, 1, &devices, NULL, NULL, &error); if (error != 0) { printf("Creat context failed!"); return -1; } std::cout << " ´´½¨ÉÏÏÂÎÄ£º" << std::chrono::duration_cast(std::chrono::steady_clock::now() - starttime).count() << std::endl; //´´½¨³ÌÐò£»×¢ÒâÒªÓÃ"rb" fopen_s(&program_handle,"kernel.cl", "rb"); if (program_handle == NULL) { printf("The kernle can not be opened!"); return -1; } fseek(program_handle, 0, SEEK_END); program_size = ftell(program_handle); rewind(program_handle); program_buffer = (char*)malloc(program_size + 1); program_buffer[program_size] = '\0'; error = fread(program_buffer, sizeof(char), program_size, program_handle); if (error == 0) { printf("Read kernel failed!"); return -1; } fclose(program_handle); program = clCreateProgramWithSource(context, 1, (const char**)&program_buffer, &program_size, &error); if (error < 0) { printf("Couldn't create the program!"); return -1; } //±àÒë³ÌÐò error = clBuildProgram(program, 1, &devices, NULL, NULL, NULL); if (error < 0) { //È·¶¨ÈÕÖ¾ÎļþµÄ´óС clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); program_log = (char*)malloc(log_size + 1); program_log[log_size] = '\0'; //¶ÁÈ¡ÈÕÖ¾ clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL); printf("%s\n", program_log); free(program_log); return -1; } free(program_buffer); //´´½¨ÃüÁî¶ÓÁÐ queue = clCreateCommandQueue(context, devices, CL_QUEUE_PROFILING_ENABLE, &error); if (error < 0) { printf("Coudn't create the command queue"); return -1; } //----------³ÌÐòÕýÎÄ¿ªÊ¼--------- for (int n = 0;n < 100;n++) { auto startexectime = std::chrono::steady_clock::now(); //´´½¨ÄÚºË kernel = clCreateKernel(program, "createBuffer", &error); if (kernel == NULL) { printf("Couldn't create kernel!\n"); return -1; } //³õʼ»¯²ÎÊý const int size = 70000; float result[size]; float a_in[size]; float b_in[size]; for (int i = 0; i < size; i++) { a_in[i] = i+n; b_in[i] = i * 2.0; } //´´½¨»º´æ¶ÔÏó cl_mem memObject1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * size, a_in, &error); if (error < 0) { printf("Creat memObject1 failed!\n"); return -1; } cl_mem memObject2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * size, b_in, &error); if (error < 0) { printf("Creat memObject2 failed!\n"); return -1; } cl_mem memObject3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * size, NULL, &error); if (error < 0) { printf("Creat memObject3 failed!\n"); return -1; } //ÉèÖÃÄں˲ÎÊý error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObject1); error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObject2); error |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObject3); if (error != CL_SUCCESS) { printf("Error setting kernel arguments!\n"); return -1; } //Ö´ÐÐÄÚºË size_t globalWorkSize[1] = { size }; size_t localWorkSize[1] = { 1 }; const int work_dim = 2; error = clEnqueueNDRangeKernel(queue, kernel, work_dim, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); if (error != CL_SUCCESS) { printf("Error queuing kernel for execution!\n"); return -1; } //¶ÁȡִÐнá¹û error = clEnqueueReadBuffer(queue, memObject3, CL_TRUE, 0, size * sizeof(float), result, 0, NULL, NULL); if (error != CL_SUCCESS) { printf("Error reading result buffer!\n"); return -1; } //ÏÔʾ½á¹û for (int i = 0; i < 2; i++) { printf("%f ", result[i]); } clReleaseKernel(kernel); clReleaseMemObject(memObject1); clReleaseMemObject(memObject2); clReleaseMemObject(memObject3); std::cout << " Ö´ÐнáÊø£º" << std::chrono::duration_cast(std::chrono::steady_clock::now() - startexectime).count() << std::endl; } //-------------³ÌÐòÕýÎĽáÊø //ÊÍ·Å×ÊÔ´ clReleaseDevice(devices); clReleaseContext(context); clReleaseProgram(program); clReleaseCommandQueue(queue); std::cout << " ºÄʱ£º" << std::chrono::duration_cast(std::chrono::steady_clock::now() - starttime).count() << std::endl; return 0; } int main3() { ImgUtil::init(); OpenCLExcuter* op = new OpenCLExcuter(); int dataCount = 100; int rows = 8* dataCount; int cols = 5 * LINE_NUMBER * RE; uchar* b_in = createTemplateData(dataCount); uchar* a_in = createDemoData(dataCount); //printf("ÊäÈë1: \n"); for (int r = 0;r < rows;r++) { for (int c = 0;c < cols;c++) { // printf("%d ", a_in[r*cols+c]); } // printf("\n"); } //printf("ÊäÈë2: \n"); for (int r = 0;r < rows;r++) { for (int c = 0;c < cols;c++) { // printf("%d ", b_in[r * cols + c]); } //printf("\n"); } /* int rows = 10; int cols = 10; uchar* b_in = (uchar*)malloc(sizeof(uchar*)*400); uchar* a_in = (uchar*)malloc(sizeof(uchar*) * 400); for (int i = 0;i < 400;i++) { b_in[i] = i; a_in[i] = i + 1; } */ op->init(); for (int i = 0;i < 100;i++) { op->recognition_numbers(a_in, b_in, rows, cols ,_NUMBER_L2_WIDTH, _NUMBER_L2_HEIGHT, _NUMBER_L2_TOTAL_NUMBER); } op->destory(); return 1; } uchar* createDemoData(int line) { unsigned char* data = (unsigned char*)malloc(sizeof(unsigned char) * (_NUMBER_L2_HEIGHT * line) * 5 * LINE_NUMBER * RE); int outLineDataCount = 8 * 5 * LINE_NUMBER * RE; int inLineDataCount = 5 * LINE_NUMBER * RE; for (int l = 0;l < line;l++) { int outLineCount = outLineDataCount * l; for (int re = 0;re < RE;re++) { for (int n = 0;n < LINE_NUMBER;n++) { for (int r = 0;r < 8;r++) { int intLineCount = inLineDataCount * r; for (int c = 0;c < 5;c++) { uchar value = ImgUtil::NUMS_LEVEL2[re%10].data.ptr(r)[c]; int index = outLineCount; index += intLineCount; int x = re * LINE_NUMBER * 5 + n * 5 + c; index += x; data[index] = value > 40 ? 1 : 0; } } } } } return data; } uchar* createTemplateData(int line) { unsigned char* data = (unsigned char*)malloc(sizeof(unsigned char) * (_NUMBER_L2_HEIGHT * line) * _NUMBER_L2_WIDTH * LINE_NUMBER * RE); int outLineDataCount = _NUMBER_L2_HEIGHT * _NUMBER_L2_WIDTH * LINE_NUMBER * RE; int inLineDataCount = _NUMBER_L2_WIDTH * LINE_NUMBER * RE; for (int l = 0;l < line;l++) { int outLineCount = outLineDataCount * l; for (int re = 0;re < RE;re++) { for (int n = 0;n < LINE_NUMBER;n++) { for (int r = 0;r < _NUMBER_L2_HEIGHT;r++) { int intLineCount = inLineDataCount * r; for (int c = 0;c < _NUMBER_L2_WIDTH;c++) { uchar value = ImgUtil::NUMS_LEVEL2[n].data.ptr(r)[c]; int index = outLineCount; index += intLineCount; int x = re * LINE_NUMBER * _NUMBER_L2_WIDTH + n * _NUMBER_L2_WIDTH + c; index += x; data[index] = value > 40 ? 1 : 0; } } } } } return data; } void printDevice() { cl_int err = CL_SUCCESS; // 1. »ñÈ¡µ±Ç°É豸ËùÓÐÖ§³ÖOpenCLµÄƽ̨µÄÊýÁ¿ cl_uint num_of_platforms = 0; err = clGetPlatformIDs(0, 0, &num_of_platforms); CHECK_ERRORS(err); // 2. »ñÈ¡µ±Ç°É豸ËùÓÐÖ§³ÖOpenCLµÄƽ̨µÄÐÅÏ¢ cl_platform_id* platforms = new cl_platform_id[num_of_platforms]; err = clGetPlatformIDs(num_of_platforms, platforms, 0); CHECK_ERRORS(err); cl_char platform_names[10][50]; // 3. ´òӡƽ̨ÐÅÏ¢ cout << "ƽ̨ÐÅÏ¢£º\n"; for (cl_uint i = 0; i < num_of_platforms; i++) { // »ñȡƽ̨×Ö·û´®µÄ³¤¶È size_t platform_name_length = 0; err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 0, 0, &platform_name_length); CHECK_ERRORS(err); // »ñȡƽ̨×Ö·û´® char* platform_name = new char[platform_name_length]; err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, platform_name_length, platform_name, 0); CHECK_ERRORS(err); cout << " [" << i << "] " << platform_name << endl; // ±¸·Ýplatform name if (i < 10) { memset(platform_names[i], 0, 50); memcpy(platform_names[i], platform_name, platform_name_length); } delete[] platform_name; } // 4. ²éѯ¸÷ƽ̨É豸ÊýÁ¿ struct { cl_device_type type; const char* name; cl_uint count; }devices[] = { {CL_DEVICE_TYPE_GPU, "CL_DEVICE_TYPE_GPU", 0}, // GPU {CL_DEVICE_TYPE_CPU, "CL_DEVICE_TYPE_CPU", 0}, // CPU {CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", 0} // ¼ÓËÙÆ÷ }; // 5. ±éÀú²éѯ¸÷¸öƽ̨ÏÂÓµÓеÄÉ豸ÊýÁ¿ for (cl_int j = 0; j < num_of_platforms; j++) { cl_platform_id platform = platforms[j]; cout << "\nplatform:" << platform_names[j] << endl; // 6. ±éÀú²éѯGPU¡¢CPU¡¢ACCELERATORÉ豸µÄÊýÁ¿ for (cl_int i = 0; i < (sizeof(devices) / sizeof(devices[0])); i++) { err = clGetDeviceIDs(platform, devices[i].type, 0, 0, &devices[i].count); if (err == CL_DEVICE_NOT_FOUND) { devices[i].count = 0; err = CL_SUCCESS; } CHECK_ERRORS(err); cout << "\tdevices:" << devices[i].name << "\tcount:" << devices[i].count; if (devices[i].count != 0) { // 7. ±éÀú²éѯGPU¡¢CPU¡¢ACCELERATOR ËùÓÐÉ豸µÄÐÅÏ¢ cl_device_id* device = new cl_device_id[devices[i].count]; err = clGetDeviceIDs(platform, devices[i].type, devices[i].count, device, 0); cout << "\t\tdevice name:"; for (cl_int k = 0; k < devices[i].count; k++) { // 8. »ñÈ¡ºÍ´òÓ¡¸÷¸öÉ豸µÄname size_t length = 0; cl_device_id each_device = device[k]; err = clGetDeviceInfo(each_device, CL_DEVICE_NAME, 0, 0, &length); CHECK_ERRORS(err); char* value = new char[length]; err = clGetDeviceInfo(each_device, CL_DEVICE_NAME, length, value, 0); CHECK_ERRORS(err); cout << value << " "; // 9. »ñÈ¡ºÍ´òÓ¡¸÷¸öÉ豸µÄversion err = clGetDeviceInfo(each_device, CL_DEVICE_VERSION, 0, 0, &length); CHECK_ERRORS(err); char* version = new char[length]; err = clGetDeviceInfo(each_device, CL_DEVICE_VERSION, length, version, 0); CHECK_ERRORS(err); cout << version << " "; delete[] value; delete[] version; } delete[] device; } cout << endl; } } delete[] platforms; }