配置VS2013 OpenCL环境
由于目前的CUDA安装包自带显卡驱动、CUAD工具、OpenCL的SDK;其中OpenCL的相关内容的默认目录有:
CL文件夹的目录:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0\include OpenCL.lib文件目录:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0\lib OpenCL.dll文件目录:C:\Program Files\NVIDIA Corporation\OpenCL
可以通过VS2013的VC++模板新建一个空项目;
图 1
为了验证配置的正确性,所以为项目添加两个文件:cl_kernel.cl和main.cpp。
1) 添加cl_kernel.cl文件
其中在项目所在的目录下新建一个cl_kernel.cl文件,其内容为附录1所示,目录结构如图 1所示。同时在VS2013的项目中将cl_kernel.cl文件添加到项目的"源文件"筛选器中,如图 2所示。
图 2
图 3
2) 添加main.cpp文件
类似cl_kernel.cl文件操作,同样将main.cpp文件添加到项目中。
需要将OpenCL的SDK的头文件包含到项目中,具体操作方法为:
在项目->属性->配置属性->C/C++->常规->附加包含目录->配置,然后添加CL文件夹的目录:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0\include。如图 3所示。
图 4
项目->属性->配置属性->c/c++->预处理器定义->编辑,然后添加"_CRT_SECURE_NO_WARNINGS",否则会报错。
图 5
具体操作:项目->属性->配置属性->链接器->常规->附加库目录。然后将OpenCL.lib文件所在的目录添加进去,其中需要注意的是将程序Debug成32位和64位平台添加的Opencl.lib目录是不同的,如图 4所示,是Debug成Win32平台,所以只加"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0\lib\Win32"路径;若是Debug成X64,则添加的路径为"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0\lib\x64"。同时需要在"启用增量链接"选项中选否。
图 6
图 7
项目->属性->配置属性->连接器->输入->附件依赖库->编辑,接着添加OpenCL.lib
图 8
图 9
附录1 cl_kernel.cl文件
1 __kernel void MyCLAdd(__global int *dst, __global int *src1, __global int *src2) 2 3 { 4 5 int index = get_global_id( 0); 6 7 dst[index] = src1[index] + src2[index]; 8 9 }
附录2:main.cpp文件
1 #include <CL/cl.h> 2 3 #include <stdio.h> 4 5 #include <iostream> 6 7 using namespace std; 8 9 10 11 int main( void){ 12 13 cl_uint numPlatforms = 0; // the NO. of platforms 14 15 cl_platform_id platform = nullptr; // the chosen platform 16 17 cl_context context = nullptr; // OpenCL context 18 19 cl_command_queue commandQueue = nullptr; 20 21 cl_program program = nullptr; // OpenCL kernel program object that'll be running on the compute device 22 23 cl_mem input1MemObj = nullptr; // input1 memory object for input argument 1 24 25 cl_mem input2MemObj = nullptr; // input2 memory object for input argument 2 26 27 cl_mem outputMemObj = nullptr; // output memory object for output 28 29 cl_kernel kernel = nullptr; // kernel object 30 31 32 33 cl_int status = clGetPlatformIDs( 0, NULL, &numPlatforms); 34 35 if (status != CL_SUCCESS) 36 37 { 38 39 cout << " Error: Getting platforms! " << endl; 40 41 return 0; 42 43 } 44 45 46 47 /* For clarity, choose the first available platform. */ 48 49 if (numPlatforms > 0) 50 51 { 52 53 cl_platform_id* platforms = (cl_platform_id*) malloc(numPlatforms* sizeof(cl_platform_id)); 54 55 status = clGetPlatformIDs(numPlatforms, platforms, NULL); 56 57 platform = platforms[ 0]; 58 59 free(platforms); 60 61 } 62 63 else 64 65 { 66 67 puts( " Your system does not have any OpenCL platform! "); 68 69 return 0; 70 71 } 72 73 74 75 /* Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device. */ 76 77 cl_uint numDevices = 0; 78 79 cl_device_id *devices; 80 81 status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); 82 83 if (numDevices == 0) // no GPU available. 84 85 { 86 87 cout << " No GPU device available. " << endl; 88 89 cout << " Choose CPU as default device. " << endl; 90 91 status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices); 92 93 devices = (cl_device_id*) malloc(numDevices * sizeof(cl_device_id)); 94 95 96 97 status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL); 98 99 } 100 101 else 102 103 { 104 105 devices = (cl_device_id*) malloc(numDevices * sizeof(cl_device_id)); 106 107 status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); 108 109 cout << " The number of devices: " << numDevices << endl; 110 111 } 112 113 114 115 /* Step 3: Create context. */ 116 117 context = clCreateContext(NULL, 1, devices, NULL, NULL, NULL); 118 119 120 121 /* Step 4: Creating command queue associate with the context. */ 122 123 commandQueue = clCreateCommandQueue(context, devices[ 0], 0, NULL); 124 125 126 127 /* Step 5: Create program object */ 128 129 // Read the kernel code to the buffer 130 131 FILE *fp = fopen( " cl_kernel.cl ", " rb "); 132 133 134 135 // 错误 1 error C4996 : 'fopen' : This function or variable may be unsafe.Consider using fopen_s instead.To disable deprecation, use _CRT_SECURE_NO_WARNINGS.See online help for details.c : \users\zyj\documents\visual studio 2013\projects\project3\project3\main.cpp 67 1 Project3 136 137 138 139 140 141 if (fp == nullptr) 142 143 { 144 145 puts( " The kernel file not found! "); 146 147 goto RELEASE_RESOURCES; 148 149 } 150 151 fseek(fp, 0, SEEK_END); 152 153 size_t kernelLength = ftell(fp); 154 155 fseek(fp, 0, SEEK_SET); 156 157 char *kernelCodeBuffer = ( char*) malloc(kernelLength + 1); 158 159 fread(kernelCodeBuffer, 1, kernelLength, fp); 160 161 kernelCodeBuffer[kernelLength] = ' \0 '; 162 163 fclose(fp); 164 165 166 167 const char *aSource = kernelCodeBuffer; 168 169 program = clCreateProgramWithSource(context, 1, &aSource, &kernelLength, NULL); 170 171 172 173 /* Step 6: Build program. */ 174 175 status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); 176 177 178 179 /* Step 7: Initial inputs and output for the host and create memory objects for the kernel */ 180 181 int __declspec(align( 32)) input1Buffer[ 128]; // 32 bytes alignment to improve data copy 182 183 int __declspec(align( 32)) input2Buffer[ 128]; 184 185 int __declspec(align( 32)) outputBuffer[ 128]; 186 187 188 189 // Do initialization 190 191 int i; 192 193 for (i = 0; i < 128; i++) 194 195 input1Buffer[i] = input2Buffer[i] = i + 1; 196 197 memset(outputBuffer, 0, sizeof(outputBuffer)); 198 199 200 201 // Create mmory object 202 203 input1MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 128 * sizeof( int), input1Buffer, nullptr); 204 205 input2MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 128 * sizeof( int), input2Buffer, nullptr); 206 207 outputMemObj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 128 * sizeof( int), NULL, NULL); 208 209 210 211 /* Step 8: Create kernel object */ 212 213 kernel = clCreateKernel(program, " MyCLAdd ", NULL); 214 215 216 217 /* Step 9: Sets Kernel arguments. */ 218 219 status = clSetKernelArg(kernel, 0, sizeof(cl_mem), ( void *)&outputMemObj); 220 221 status = clSetKernelArg(kernel, 1, sizeof(cl_mem), ( void *)&input1MemObj); 222 223 status = clSetKernelArg(kernel, 2, sizeof(cl_mem), ( void *)&input2MemObj); 224 225 226 227 /* Step 10: Running the kernel. */ 228 229 size_t global_work_size[ 1] = { 128 }; 230 231 status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); 232 233 clFinish(commandQueue); // Force wait until the OpenCL kernel is completed 234 235 236 237 /* Step 11: Read the cout put back to host memory. */ 238 239 status = clEnqueueReadBuffer(commandQueue, outputMemObj, CL_TRUE, 0, global_work_size[ 0] * sizeof( int), outputBuffer, 0, NULL, NULL); 240 241 242 243 printf( " Veryfy the rsults... "); 244 245 for (i = 0; i < 128; i++) 246 247 { 248 249 if (outputBuffer[i] != (i + 1) * 2) 250 251 { 252 253 puts( " Results not correct! "); 254 255 break; 256 257 } 258 259 } 260 261 if (i == 128) 262 263 puts( " Correct! "); 264 265 RELEASE_RESOURCES: 266 267 /* Step 12: Clean the resources. */ 268 269 status = clReleaseKernel(kernel); // *Release kernel. 270 271 status = clReleaseProgram(program); // Release the program object. 272 273 status = clReleaseMemObject(input1MemObj); // Release mem object. 274 275 status = clReleaseMemObject(input2MemObj); 276 277 status = clReleaseMemObject(outputMemObj); 278 279 status = clReleaseCommandQueue(commandQueue); // Release Command queue. 280 281 status = clReleaseContext(context); // Release context. 282 283 284 285 free(devices); 286 287 }
