grid是1D的,block是2D的
#include<opencv2\opencv.hpp> #include<cuda_runtime.h> #include<stdio.h> #include "device_launch_parameters.h" using namespace cv; __global__ void revImg(uchar3* dev_c,int width,int height) { int tid = blockIdx.x*blockDim.x*blockDim.y + threadIdx.y*blockDim.x + threadIdx.x; if (tid < width*height) { dev_c[tid].x = 255 - dev_c[tid].x; dev_c[tid].y = 255 - dev_c[tid].y; dev_c[tid].z = 255 - dev_c[tid].z; } } int main() { Mat image = imread("E:/code/study_cuda/study_reduce/study_reduce/cinque_terre_small.jpg"); imshow("src", image); int width, height; width = image.size().width; height = image.size().height; printf("width=%d height=%d", width, height); int size = width * height; uchar3 *dev_c ; cudaMalloc((void**)&dev_c, size * sizeof(uchar3)); cudaMemcpy(dev_c, image.data, size * sizeof(uchar3), cudaMemcpyHostToDevice); int thread = 16; int grid = (size + thread - 1) / (thread*thread); dim3 dimGrid(grid); dim3 dimBlock(thread,thread); revImg << <dimGrid, dimBlock >> > (dev_c,width,height); cudaMemcpy(image.data, dev_c, size * sizeof(uchar3), cudaMemcpyDeviceToHost); imshow("gpu", image); waitKey(0); cudaFree(dev_c); return 0; }