device全局变量的使用主要用到了两个函数:
cudaMemcpyToSymbol() cudaMemcpyFromSymbol() __constant__ float constData[256]; float data[256]; cudaMemcpyToSymbol(constData, data, sizeof(data)); cudaMemcpyFromSymbol(data, constData, sizeof(data)); __device__ float devData; float value = 3.14f; cudaMemcpyToSymbol(devData, &value, sizeof(float)); __device__ float* devPointer; float* ptr; cudaMalloc(&ptr, 256 * sizeof(float)); cudaMemcpyToSymbol(devPointer, &ptr, sizeof(ptr));
假设下面代码保存在var.cu
#include <iostream> #include <cuda.h> #include <cuda_runtime.h> using namespace std; __device__ int d_data; __global__ void changeData() { d_data = 567; } int main() { cudaError_t err; int h_data = 123; cudaSetDevice(0); cout<<"before kernel, h_data = "<<h_data<<endl; changeData<<<1,1>>>(); err = cudaMemcpyFromSymbol((void*)&h_data, d_data, sizeof(int)); if(err != cudaSuccess) { cout<<"from symbol error!"<<endl; exit(0); } cout<<h_data<<endl; cudaDeviceReset(); return 0; }
编译文件:nvcc var.cu -o var -arch=sm_35
运行可执行文件: ./var
before kernel, h_data =123
567