clEnqueueNDRangeKernel抛出CL_OUT_OF_RESOURCES

时间:2016-08-02 20:41:54

标签: opencl nvidia

我的内核在Intel HD显卡上运行得非常好。但是,当我想在我的GeForce 960上运行内核时,它会给出CL_OUT_OF_RESOURCES错误。

我已尝试过不同的本地大小,并确保不会超出数组索引,但仍然不知道为什么会发生此错误。你知道为什么我的代码在英特尔上运行正常并且不能在NVIDIA上运行吗?

我的代码中发生的一件奇怪的事情是,我有类似操作的13种情况。出于性能目的,我重复了相同的操作13次,并避免编写循环只是为了保存循环所具有的一些额外操作。当我到达第11个操作时,代码适用于NVIDIA。但是,当我在代码中包含第12个操作时,它会产生上述错误,并且第11个和第12个操作类似!任何想法为什么会发生这种事情?

这是内核:

float2 projectCube(float3 axis, float3 vertex){

    float voxelSize = 0.5f;
    float2 projection = (float2)(0.0f, 0.0f);

    float temp;
    //1
    temp = axis.x;
    if (projection.x > temp){ projection.x = temp; }
    else if (projection.y < temp){ projection.y = temp; }

    //2
    temp = axis.x + axis.y;
    if (projection.x > temp){ projection.x = temp; }
    else if (projection.y < temp){ projection.y = temp; }

    //3
    temp = axis.y;
    if (projection.x > temp){ projection.x = temp; }
    else if (projection.y < temp){ projection.y = temp; }

    //4
    temp = axis.z;
    if (projection.x > temp){ projection.x = temp; }
    else if (projection.y < temp){ projection.y = temp; }

    //5
    temp = axis.x + axis.z;
    if (projection.x > temp){ projection.x = temp; }
    else if (projection.y < temp){ projection.y = temp; }

    //6
    temp = axis.y + axis.z;
    if (projection.x > temp){ projection.x = temp; }
    else if (projection.y < temp){ projection.y = temp; }

    //7
    temp = axis.x + axis.y + axis.z;
    if (projection.x > temp){ projection.x = temp; }
    else if (projection.y < temp){ projection.y = temp; }

    float product = dot(axis, vertex);
    projection.x = voxelSize * projection.x + product;
    projection.y = voxelSize * projection.y + product;
    return projection;
}

float2 projectTriangle(float3 axis, float3 v0, float3 v1, float3 v2){

    float2 projection;
    projection.x = dot(axis, v0);
    projection.y = projection.x;

    float temp = dot(axis, v1);
    if(projection.x > temp){
        projection.x = temp;
    }
    else if(projection.y < temp){
        projection.y = temp;
    }
    temp = dot(axis, v2);
    if (projection.x > temp){
        projection.x = temp;
    }
    else if (projection.y < temp){
        projection.y = temp;
    }
    return projection;
}

float tester(float3 axis, float3 voxel, float3 v0, float3 v1, float3 v2){

    float2 voxelProjection = projectCube(axis, voxel);
    float2 faceProjection = projectTriangle(axis, v0, v1, v2);
    float minProjection = fmin(voxelProjection.x, faceProjection.x);
    float maxProjection = fmax(voxelProjection.y, faceProjection.y);
    float testResult = maxProjection - minProjection - voxelProjection.y + voxelProjection.x
        - faceProjection.y + faceProjection.x;
    return testResult;
}

__kernel void voxelizer(size_t global_size,
                        float h_voxelSize,
                        __global float* h_minBoundsGrid,
                        __global int *h_dimGrid,
                        __global float* coords,
                        __global int* density)
{

    //printf("local size is: %d\n", get_num_groups(0));
    int i = get_global_id(0) * 9;
    if (i <= global_size * 9){

        float voxelSize = h_voxelSize;
        float3 minBoundsGrid;
        minBoundsGrid.x = h_minBoundsGrid[0];
        minBoundsGrid.y = h_minBoundsGrid[1];
        minBoundsGrid.z = h_minBoundsGrid[2];
        int3 dimGrid;
        dimGrid.x = h_dimGrid[0];
        dimGrid.y = h_dimGrid[1];
        dimGrid.z = h_dimGrid[2];

        if ( i %9 == 0){

            /*Triangle vertices*/
            float3 v0;
            v0 = (float3)(coords[i], coords[i + 1], coords[i + 2]);
            float3 v1;
            v1 = (float3)(coords[i + 3], coords[i + 4], coords[i + 5]);
            float3 v2;
            v2 = (float3)(coords[i + 6], coords[i + 7], coords[i + 8]);
            //printf("i = %d. v0: %f, %f, %f\n", i, v0.x, v0.y, v0.z);
            //printf("i = %d. v1: %f, %f, %f\n", i, v1.x, v1.y, v1.z);
            //printf("i = %d. v2: %f, %f, %f\n", i, v2.x, v2.y, v2.z);

            /*Normal vectors of the each voxel*/
            float3 e0;
            e0 = (float3)(0.5f, 0.0f, 0.0f);
            float3 e1;
            e1 = (float3)(0.0f, 0.5f, 0.0f);
            float3 e2;
            e2 = (float3)(0.0f, 0.0f, 0.5f);

            /*Edges of a traingle*/
            float3 f0;
            f0 = v1 - v0;
            float3 f1;
            f1 = v2 - v1;
            float3 f2;
            f2 = v0 - v2;

            float3 minLocalGrid;
            minLocalGrid.x = fmin(v0.x, fmin(v1.x, v2.x));          
            minLocalGrid.y = fmin(v0.y, fmin(v1.y, v2.y));          
            minLocalGrid.z = fmin(v0.z, fmin(v1.z, v2.z));          
            minLocalGrid.x = voxelSize * floor(minLocalGrid.x / voxelSize);
            minLocalGrid.y = voxelSize * floor(minLocalGrid.y / voxelSize);
            minLocalGrid.z = voxelSize * floor(minLocalGrid.z / voxelSize);
            //printf("i = %d. minLocalGrid = %f, %f, %f.\n", i, minLocalGrid.x, minLocalGrid.y, minLocalGrid.z);

            float3 maxLocalGrid;
            maxLocalGrid.x = fmax(v0.x, fmax(v1.x, v2.x));
            maxLocalGrid.y = fmax(v0.y, fmax(v1.y, v2.y));
            maxLocalGrid.z = fmax(v0.z, fmax(v1.z, v2.z));
            maxLocalGrid.x = voxelSize * ceil(maxLocalGrid.x / voxelSize);
            maxLocalGrid.y = voxelSize * ceil(maxLocalGrid.y / voxelSize);
            maxLocalGrid.z = voxelSize * ceil(maxLocalGrid.z / voxelSize);
            if (maxLocalGrid.x == minLocalGrid.x){ maxLocalGrid.x += voxelSize; }
            if (maxLocalGrid.y == minLocalGrid.y){ maxLocalGrid.y += voxelSize; }
            if (maxLocalGrid.z == minLocalGrid.z){ maxLocalGrid.z += voxelSize; }
            //printf("i = %d. maxLocalGrid = %f, %f, %f.\n", i, maxLocalGrid.x, maxLocalGrid.y, maxLocalGrid.z);

            //printf("i = %d\n v0 = %f, %f, %f\n v1 = %f, %f, %f\n v2 = %f, %f, %f\n minLocalGrid = %f, %f, %f\n===============\n",
            //  i, v0.x, v0.y, v0.z, v1.x, v1.y, v1.z, v2.x, v2.y, v2.z, maxLocalGrid.x, maxLocalGrid.y, maxLocalGrid.z);

            float j = minLocalGrid.z;
            while(j < maxLocalGrid.z){
                float k = minLocalGrid.y;
                while(k < maxLocalGrid.y){
                    float l = minLocalGrid.x;
                    while (l < maxLocalGrid.x){

                        float3 firstVertexOfVoxel = (float3)(l, k, j);
                        //printf("l,k,j: %f, %f, %f\n", l, k, j);
                        float3 globalCoordOffset = (firstVertexOfVoxel - minBoundsGrid) / voxelSize;
                        int3 globalDimOffset = convert_int3_rtz(globalCoordOffset);
                        //printf("i = %d. globalCoordOffset: %f, %f, %f\n", i, globalCoordOffset.x, globalCoordOffset.y, globalCoordOffset.z);
                        //printf("i = %d. globalDimOffset: %d, %d, %d\n", i, globalDimOffset.x, globalDimOffset.y, globalDimOffset.z);

                        int voxelIndexGlobalGrid = globalDimOffset.x + dimGrid.x * (globalDimOffset.y +
                            dimGrid.y * globalDimOffset.z);
                        //printf("i = %d. voxelIndexGlobalGrid = %d\n", i, voxelIndexGlobalGrid);

                        if (density[voxelIndexGlobalGrid] != 1){

                            /*The famous 13-axes test*/
                            float3 axis;
                            float testResult = 0;
                            int overlapCount = 0;

                            //1
                            testResult = tester(e0, firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //2
                            testResult = tester(e1, firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //3
                            testResult = tester(e2, firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //4
                            //axis = ;
                            testResult = tester(cross(-f2, f0), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //5
                            /*axis = cross(e0, f0);*/
                            testResult = tester(cross(e0, f0), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //6
                            //axis = cross(e0, f0);
                            testResult = tester(cross(e0, f1), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //7
                            //axis = cross(e0, f0);
                            testResult = tester(cross(e0, f2), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //8
                            //axis = cross(e1, f0);
                            testResult = tester(cross(e1, f0), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //9
                            //axis = cross(e1, f1);
                            testResult = tester(cross(e1, f1), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //10
                            //axis = cross(e1, f2);
                            testResult = tester(cross(e1, f2), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //11
                            //axis = cross(e2, f0);
                            testResult = tester(cross(e2, f0), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //12
                            //axis = cross(e2, f1);
                            testResult = tester(cross(e2, f1), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }

                            //13
                            //axis = cross(e2, f2);
                            testResult = tester(cross(e2, f2), firstVertexOfVoxel, v0, v1, v2);
                            if (testResult <= 0){

                                overlapCount++;
                            }
                            if (overlapCount == 13){ 

                                density[voxelIndexGlobalGrid] = 1;
                            }

                        }
                    l = l + voxelSize;
                    }// while for l
                    k = k + voxelSize;
                }// while for k
                j = j + voxelSize;
            }//while for j

            //printf("Here are the max of the %d-th face: %f, %f, %f\n", i / 9, maxLocalGrid.x, maxLocalGrid.y, maxLocalGrid.z);
            //printf("Here are the coordinates of the %d-th face: %f, %f, %f\n", i / 9, e1.x, e1.y, e1.z);
            //printf("Here are the coordinates of the %d-th face: %f, %f, %f\n", i / 9, e2.x, e2.y, e2.z);

            //printf("\n==================KERNEL COMPUTED==================\n");
            //barrier(CLK_LOCAL_MEM_FENCE);

        }

    }
}

这是c代码:

#define DEVICE_SELECTOR 1 //0 for Intel and 1 for Nvidia in my computer
#define _CRT_SECURE_NO_WARNINGS
#define KERNEL_FILE "..\\voxelizerKernel.cl"
#define WORK_DIM 1

#define VOXEL_SIZE 0.5f
#define HALF_VOXEL_SIZE VOXEL_SIZE/2.0f;


//C header files

    #include <math.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <time.h>
    #include <algorithm>
    //OpenCL header files
    #ifdef MAC
    #include <OpenCL/cl.h>
    #else
    #include <CL/cl.h>
    #endif



    cl_device_id create_device() {

        cl_platform_id *platform;
        cl_device_id dev;
        cl_uint num_platform;
        int err;

        /* Identify a platform */
        err = clGetPlatformIDs(0, NULL, &num_platform);
        if (err < 0) {
            printf("Error code: %d. Couldn't identify a platform\n", err);
            exit(1);
        }
        platform = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platform);
        clGetPlatformIDs(num_platform, platform, NULL);
        /* Access a device */
        err = clGetDeviceIDs(platform[DEVICE_SELECTOR], CL_DEVICE_TYPE_GPU, 1, &dev, NULL);

        if (err < 0) {
            printf("Error code: %d. Couldn't access any devices\n", err);
            exit(1);
        }

        return dev;
    }
    cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {

        cl_program program;
        FILE *program_handle;
        char *program_buffer, *program_log;
        size_t program_size, log_size;
        int err;

        /* Read program file and place content into buffer */
        program_handle = fopen(filename, "r");
        if (program_handle == NULL) {
            printf("Couldn't find the program file\n");
            exit(1);
        }
        fseek(program_handle, 0, SEEK_END);
        program_size = ftell(program_handle);
        rewind(program_handle);
        program_buffer = (char*)malloc(program_size + 1);
        program_buffer[program_size] = '\0';
        fread(program_buffer, sizeof(char), program_size, program_handle);
        fclose(program_handle);

        /* Create program from file */
        program = clCreateProgramWithSource(ctx, 1,
            (const char**)&program_buffer, &program_size, &err);
        if (err < 0) {
            printf("Error code: %d. Couldn't create the program\n", err);
            exit(1);
        }
        free(program_buffer);

        /* Build program */
        err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
        if (err < 0) {

            /* Find size of log and print to std output */
            clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
                0, NULL, &log_size);
            program_log = (char*)malloc(log_size + 1);
            program_log[log_size] = '\0';
            clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
                log_size + 1, program_log, NULL);
            printf("%s\n", program_log);
            free(program_log);
            exit(1);
        }

        return program;
    }
    void print_device_info(cl_device_id dev){

        cl_ulong glob_mem_size, local_mem_size;
        cl_uint clock_freq, num_core, work_item_dim, time_res;
        size_t local_size, work_item_size[3];
        char dev_vendor[40], dev_name[400], driver_version[40], device_version[40];

        clGetDeviceInfo(dev, CL_DEVICE_VENDOR, sizeof(dev_vendor), &dev_vendor, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_NAME, sizeof(dev_name), &dev_name, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(glob_mem_size), &glob_mem_size, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(local_mem_size), &local_mem_size, NULL);
        clGetDeviceInfo(dev, CL_DRIVER_VERSION, sizeof(driver_version), &driver_version, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_VERSION, sizeof(device_version), &device_version, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_freq), &clock_freq, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_core), &num_core, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(local_size), &local_size, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(work_item_size), &work_item_size, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(work_item_dim), &work_item_dim, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(time_res), &time_res, NULL);

        printf("==========================================================\n");
        printf("Device Sepc without consideration of kernels:\n");
        printf("CL_DEVICE_VENDOR:                     %s\n", dev_vendor);
        printf("CL_DEVICE_NAME:                       %s\n", dev_name);
        printf("CL_DEVICE_GLOBAL_MEM_SIZE:            %I64u GB\n", glob_mem_size / 1073741824);
        printf("CL_DEVICE_LOCAL_MEM_SIZE:             %I64u KB\n", local_mem_size / 1024);
        printf("CL_DRIVER_VERSION:                    %s\n", driver_version);
        printf("CL_DEVICE_VERSION:                    %s\n", device_version);
        printf("CL_DEVICE_MAX_CLOCK_FREQUENCY:        %I32u MHz\n", clock_freq);
        printf("CL_DEVICE_MAX_COMPUTE_UNITS:          %I32u\n", num_core);
        printf("CL_DEVICE_MAX_WORK_GROUP_SIZE         %u\n", local_size);
        printf("CL_DEVICE_MAX_WORK_ITEM_SIZES:        {%I32u, %I32u, %I32u}\n", work_item_size[0], work_item_size[1], work_item_size[2]);
        printf("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:   %I32u\n", work_item_dim);
        printf("CL_DEVICE_PROFILING_TIMER_RESOLUTION: %I32u ns\n", time_res);
        printf("==========================================================\n");

    }


    int main()
    {

        /*OpenCL variables*/
        cl_int i, j, err, num_groups;
        size_t local_size, max_local_size, global_size, processed_global_size;

        cl_context context;
        cl_command_queue queue;

        cl_program program;
        cl_device_id device;
        cl_kernel voxelization_kernel, reduction_kernel, reduction_complete_kernel;

        cl_mem coords_buffer, density_buffer, dimGrid_buffer, h_minBoundsGrid_buffer, fullVxelsCount_buffer, group_sums_buffer;
        void *density_mapped_memory;

        cl_event prof_event;
        cl_ulong time_start, time_end, total_time;

        float h_voxelSize = VOXEL_SIZE;

        float fullVxelsCount = 0;

        /*Read mesh data*/
        float coords[54] =
        { 0.300500,
        1.300000,
        0.000500,
        1.200500,
        1.600000,
        0.000500,
        1.600500,
        0.600000,
        0.000500,
        0.300500,
        1.300000,
        0.000500,
        0.500500,
        1.900000,
        0.000500,
        1.200500,
        1.600000,
        0.000500,
        0.300500,
        1.300000,
        0.000500,
        1.600500,
        0.600000,
        0.000500,
        0.100500,
        0.700000,
        0.000500,
        0.100500,
        0.700000,
        0.000500,
        1.600500,
        0.600000,
        0.000500,
        0.000500,
        0.200000,
        0.000500,
        0.000500,
        0.200000,
        0.000500,
        1.600500,
        0.600000,
        0.000500,
        1.600500,
        0.100000,
        0.000500,
        1.200500,
        1.600000,
        0.000500,
        1.600500,
        1.300000,
        0.000500,
        1.600500,
        0.600000,
        0.000500 };


        /*Get the voxel count*/
        float boundsGrid[6] = {0,2,0,2,0,0.5};

        int dimGrid[3] = {
            (boundsGrid[1] - boundsGrid[0]) / VOXEL_SIZE,
            (boundsGrid[3] - boundsGrid[2]) / VOXEL_SIZE,
            (boundsGrid[5] - boundsGrid[4]) / VOXEL_SIZE
        };
        if (dimGrid[0] == 0) dimGrid[0] = 1;
        if (dimGrid[1] == 0) dimGrid[1] = 1;
        if (dimGrid[2] == 0) dimGrid[2] = 1;
        float h_minBoundsGrid[3];
        h_minBoundsGrid[0] = boundsGrid[0];
        h_minBoundsGrid[1] = boundsGrid[2];
        h_minBoundsGrid[2] = boundsGrid[4];
        int voxelCounts = dimGrid[0] * dimGrid[1] * dimGrid[2];


        /*Prepare kernel output : build an array for storing voxles' density info*/
        int *density = (int*)malloc(sizeof(int)*voxelCounts);
        for (int i = 0; i < voxelCounts; i++){
            density[i] = 0;
        }


        /*OpenCL essentials*/
        device = create_device();
        clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, NULL);
        //print_device_info(device);

        context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
        if (err < 0) {
            printf("Error code: %d. Couldn't create a context\n", err);
            exit(1);
        }
        program = build_program(context, device, KERNEL_FILE);
        queue = clCreateCommandQueue(context, device,
            CL_QUEUE_PROFILING_ENABLE, &err);
        if (err < 0) {
            printf("Error code: %d. Couldn't create a command queue\n", err);
            exit(1);
        };
        voxelization_kernel = clCreateKernel(program, "voxelizer", &err);
        if (err < 0) {
            printf("Error code: %d. Couldn't create a kernel\n", err);
            exit(1);
        };

        int numberOfFaces = 6;
        global_size = numberOfFaces;
        local_size = max_local_size;
        if (global_size % local_size != 0){
            processed_global_size = (global_size / local_size + 1) * local_size;
            //int padding = processed_global_size - global_size;
            //int *working_data = (int*)malloc((voxelCounts + padding)*sizeof(int));
            //memcpy(working_data, density, voxelCounts);
            //memset(working_data + voxelCounts, 0.0, padding);
        }
        else{
            processed_global_size = global_size;
        }


        /* Create host-device data exchange interface*/
        dimGrid_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY |
            CL_MEM_COPY_HOST_PTR, sizeof(float)* 3, dimGrid, &err);
        h_minBoundsGrid_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY |
            CL_MEM_COPY_HOST_PTR, sizeof(float)* 3, h_minBoundsGrid, &err);
        coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY |
            CL_MEM_COPY_HOST_PTR, sizeof(float) * 54, coords, &err);
        density_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY |
            CL_MEM_COPY_HOST_PTR, sizeof(int) * voxelCounts, density, &err);
        if (err < 0) {
            printf("Error code: %d. Couldn't create a buffer\n", err);
            exit(1);
        };

        err = clSetKernelArg(voxelization_kernel, 0, sizeof(global_size), &global_size);
        err |= clSetKernelArg(voxelization_kernel, 1, sizeof(h_voxelSize), &h_voxelSize);
        err |= clSetKernelArg(voxelization_kernel, 2, sizeof(cl_mem), &h_minBoundsGrid_buffer);
        err |= clSetKernelArg(voxelization_kernel, 3, sizeof(cl_mem), &dimGrid_buffer);
        err |= clSetKernelArg(voxelization_kernel, 4, sizeof(cl_mem), &coords_buffer);
        err |= clSetKernelArg(voxelization_kernel, 5, sizeof(cl_mem), &density_buffer);
        if (err < 0) {
            printf("Error code: %d. Couldn't create an argument for voxelization_kernel\n", err);
            exit(1);
        }

        /* Do the voxelization magic */
        err = clEnqueueNDRangeKernel(queue, voxelization_kernel, 1, NULL, &processed_global_size,
            &local_size, 0, NULL, &prof_event);
        if (err < 0) {
            printf("Error code: %d. Couldn't enqueue the voxelization_kernel\n", err);
            exit(1);
        }

        /* Read the results */
        density_mapped_memory = clEnqueueMapBuffer(queue, density_buffer, CL_TRUE,
            CL_MAP_READ, 0, sizeof(density), 0, NULL, NULL, &err);
        if (err < 0) {
            printf("Error code : %d. Couldn't map the buffer to host memory\n", err);
            exit(1);
        }
        memcpy(density, density_mapped_memory, sizeof(density)* voxelCounts);
        err = clEnqueueUnmapMemObject(queue, density_buffer, density_mapped_memory,
            0, NULL, NULL);
        if (err < 0) {
            printf("Error code: %d. Couldn't unmap the density_buffer\n", err);
            exit(1);
        }
        for (int i = 0; i < voxelCounts; i++){
            printf("%d\n", density[i]);
        }


        /*Clean up*/
        clReleaseKernel(voxelization_kernel);
        clReleaseMemObject(dimGrid_buffer);
        clReleaseMemObject(h_minBoundsGrid_buffer);
        clReleaseMemObject(coords_buffer);
        clReleaseMemObject(density_buffer);
        clReleaseCommandQueue(queue);
        clReleaseProgram(program);
        clReleaseContext(context);


        return 0;
    }

0 个答案:

没有答案
相关问题