使用OpenCL

时间:2015-05-22 12:16:40

标签: c++ c opencl

我正在尝试将一组5D点(像素)的欧氏距离计算到5D单点(中心)并存储在另一个结果向量中,我想使用向量索引将所有信息存储在单个向量中,以便对于第i个像素,5维是(5i),(5i + 1),... 我是OpenCL的新手,我只是为了自己的意图在互联网上编辑了一个示例代码。理论是对的,但代码没有显示正确的答案! 这是内核

//d_kernel.cl

__kernel void distance_kernel(__global double *pixelInfo,
                                __global double *clusterCentres,
                                __global double *distanceFromClusterCentre)
{
    int index = get_global_id(0);

    int d, dl, da, db, dx, dy;

    dl = pixelInfo[5 * index] - clusterCentres[0];
    dl = dl * dl;

    da = pixelInfo[5 * index + 1] - clusterCentres[1];
    da = da * da;

    db = pixelInfo[5 * index + 2] - clusterCentres[2];
    db = db * db;

    dx = pixelInfo[5 * index + 3] - clusterCentres[3];
    dx = dx * dx;

    dy = pixelInfo[5 * index + 4] - clusterCentres[4];
    dy = dy * dy;

    distanceFromClusterCentre[index] = dx + dy + dl + da + db;

}

这是主机代码

#include <iostream>
#include <CL/cl.h>
#include <vector>
using namespace std;

#define MAX_SOURCE_SIZE (0x100000)
int main(int argc, char **argv)
{

    // Create the two input vectors
    int i;
    const int pixelsNumber = 1024;
    const int clustersNumber = 1;

    std::vector<double> pixelInfo;
    pixelInfo.resize(5 * pixelsNumber);
    std::fill(pixelInfo.begin(), pixelInfo.end(), 500);

    std::vector<double> clusterCentres;
    clusterCentres.resize(5 * clustersNumber);
    std::fill(clusterCentres.begin(), clusterCentres.end(), 200);

    std::vector<double> distanceFromClusterCentre;
    distanceFromClusterCentre.resize(pixelsNumber);
    std::fill(distanceFromClusterCentre.begin(), distanceFromClusterCentre.end(), 0);

    // Load the kernel source code into the array source_str
    FILE *fp;
    char *source_str;
    size_t source_size;

    fp = fopen("d_kernel.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose(fp);

    // Get platform and device information
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1,
        &device_id, &ret_num_devices);

    // Create an OpenCL context
    cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);

    // Create a command queue
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    // Create memory buffers on the device for each vector 
    cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
        5 * pixelsNumber * sizeof(int), NULL, &ret);
    cl_mem clusterCentres_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
        5 * clustersNumber * sizeof(int), NULL, &ret);
    cl_mem distanceFromClusterCentre_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
        pixelsNumber * sizeof(int), NULL, &ret);

    // Copy the vectors to their respective memory buffers
    ret = clEnqueueWriteBuffer(command_queue, pixelInfo_mem, CL_TRUE, 0,
        5 * pixelsNumber * sizeof(int), pixelInfo.data(), 0, NULL, NULL);
    ret = clEnqueueWriteBuffer(command_queue, clusterCentres_mem, CL_TRUE, 0,
        5 * clustersNumber * sizeof(int), clusterCentres.data(), 0, NULL, NULL);

    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1,
        (const char **)&source_str, (const size_t *)&source_size, &ret);

    // Build the program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);

    // Set the arguments of the kernel
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&pixelInfo_mem);
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&clusterCentres_mem);
    ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&distanceFromClusterCentre_mem);

    // Execute the OpenCL kernel on the list
    size_t global_item_size = pixelsNumber; // Process the entire lists
    size_t local_item_size = 64; // Divide work items into groups of 64
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
        &global_item_size, &local_item_size, 0, NULL, NULL);

    // Read the memory buffer result on the device to the local vector result
    ret = clEnqueueReadBuffer(command_queue, distanceFromClusterCentre_mem, CL_TRUE, 0,
        pixelsNumber * sizeof(int), distanceFromClusterCentre.data(), 0, NULL, NULL);

    // Display the result to the screen
    for (i = 0; i < pixelsNumber; i++)
    {
        cout << "Pixel " << i << ": " << distanceFromClusterCentre[i] << endl;
        //system("PAUSE");
    }

    // Clean up
    ret = clFlush(command_queue);
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseMemObject(pixelInfo_mem);
    ret = clReleaseMemObject(clusterCentres_mem);
    ret = clReleaseMemObject(distanceFromClusterCentre_mem);
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);
    free(pixelInfo.data());
    free(clusterCentres.data());
    free(distanceFromClusterCentre.data());

    system("PAUSE");
    return 0;
}

结果的一部分是:

.
.
.
Pixel 501: -1.11874e+306
Pixel 502: -1.16263e+306
Pixel 503: -1.07485e+306
Pixel 504: -1.03079e+306
Pixel 505: -9.42843e+305
Pixel 506: -9.86903e+305
Pixel 507: -8.98954e+305
Pixel 508: -9.86903e+305
Pixel 509: -8.98954e+305
Pixel 510: -9.43014e+305
Press any key to continue . . .
Pixel 511: -8.55065e+305
Pixel 512: 0
Pixel 513: 0
Pixel 514: 0
Pixel 515: 0
Pixel 516: 0
Pixel 517: 0
Pixel 518: 0
Pixel 519: 0
Pixel 520: 0
.
.
.

在索引511之后,向量的其余部分为零!

1 个答案:

答案 0 :(得分:1)

你创建了double的向量,然后你将它们视为有int(为int创建缓冲区,将数据写入int缓冲区并读取结果,因为有int)。为避免此类错误,您可以这样编写代码:

cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, pixelInfo.size() * sizeof(pixelInfo[0]), NULL, &ret);
                                                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^