这个简单的cuda程序有什么问题?

时间:2015-05-18 15:54:02

标签: cuda

我只是想用CUDA来清空图像。但是“之前”和“之后”我得到了相同的原始图像。无法弄清楚问题。

sumKernel.cu:

#include "sumKernel.h"

__global__ void _sumKernel(char *image, int width, int height, char *kernel, int kerwidth, int kerheight) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    image[idx] = 0;
}

void sumKernel(char *image, int width, int height, char *kernel, int kerwidth, int kerheight) {
    dim3 blocks(1);
    dim3 threads(width*height);
    _sumKernel<<<blocks, threads>>>(image, width, height, kernel, kerwidth, kerheight);
}

sumKernel.h:

void sumKernel(char *image, int width, int height, char *kernel, int kerwidth, int kerheight);

main.cpp中:

findCudaDevice(argc, (const char **)argv);

Mat image = imread("a.jpg");
Mat gray; cvtColor(image, gray, CV_BGR2GRAY);
imshow("before", gray);

char *gray_g;
cudaMalloc((void **)&gray_g, gray.size().area());
cudaMemcpy(gray_g, gray.data, sizeof(char)*gray.size().area(), cudaMemcpyHostToDevice);

char kernel[9];
char *kernel_g;
cudaMalloc((void **)&kernel_g, sizeof(char)*9);

sumKernel(gray_g, gray.cols, gray.rows, kernel, 3, 3);

cudaMemcpy(gray.data, gray_g, sizeof(char)*gray.size().area(), cudaMemcpyDeviceToHost);

imshow("after", gray);
waitKey(0);

cudaFree(kernel);
cudaFree(gray_g);

1 个答案:

答案 0 :(得分:2)

(发表评论作为答案)

check所有API调用错误。最有可能的是,宽度*高度线程的块大小对于单个块来说太大了。