错误:当我使用常量内存

时间:2017-11-05 13:33:00

标签: c++ cuda nvidia

当我使用恒定记忆时,我遇到了问题。它会发生错误:

ERROR: an illegal memory access was encountered

似乎内核函数没有执行。 但如果我不选择常量内存,一切都还可以。所以它让我很困惑。我想了很长时间。但我仍然没有理由。你能帮我解决一下这个问题吗?非常感谢你。

如果变量s未使用常量内存,则一切正常。但如果s使用常量内存,程序将会中断。

使用常量内存的变量定义如下:

#ifdef USE_CONST_MEM
__constant__ Sphere s[SPHERES];
#else
Sphere          *s;
#endif

内核函数定义如下:

#ifdef USE_CONST_MEM
__global__ void kernel(unsigned char *ptr) {
    printf("ok2");
#else
__global__ void kernel(Sphere *s, unsigned char *ptr) {
#endif
    // map from threadIdx/BlockIdx to pixel position
    printf("ok2");
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;
    REAL   ox = (x - DIM / 2);
    REAL   oy = (y - DIM / 2);

    REAL   r = 0, g = 0, b = 0;
    REAL   maxz = -INF;
    __syncthreads();
    for (int i = 0; i<SPHERES; i++) {
        REAL   n;
        REAL   t = s[i].hit(ox, oy, &n);
        if (t > maxz) {
            REAL fscale = n;
            r = s[i].r * fscale;
            g = s[i].g * fscale;
            b = s[i].b * fscale;
            maxz = t;
            printf("r: %.2f g: %.2f, b %.2f\n", r, g, b);
        }
    }
    __syncthreads();
    ptr[offset * 4 + 0] = (int)(r * 255);
    ptr[offset * 4 + 1] = (int)(g * 255);
    ptr[offset * 4 + 2] = (int)(b * 255);
    ptr[offset * 4 + 3] = 255;
}

// globals needed by the update routine
struct DataBlock {
    unsigned char   *dev_bitmap;
    CPUAnimBitmap  *bitmap;
};

有调用内核函数的函数。

void generate_frame(DataBlock *d, int ticks) {
    //START_GPU

    //movin the spheres
    kernelMoving << <128, 32 >> >(s, SPHERES);
    printf("ok0\n");
    // generate a bitmap from our sphere data
    dim3    grids(DIM / 16, DIM / 16);
    dim3    threads(16, 16);

#ifdef USE_CONST_MEM
    Sphere *d_s;
    cudaGetSymbolAddress((void **)&d_s, s);
    printf("ok0-1\n");
    kernel << <grids, threads >> >(s, d->dev_bitmap);
    cudaDeviceSynchronize();
    cudaError_t error = cudaGetLastError();
    if(error!=cudaSuccess)
    {
       fprintf(stderr,"ERROR: %s\n", cudaGetErrorString(error) );
       exit(-1);
    }
    printf("ok0-1-1\n");
#else
    printf("ok0-2\n");
    kernel << <grids, threads >> >(s, d->dev_bitmap);
#endif
    printf("ok1\n");
    //END_GPU

    HANDLE_ERROR(cudaMemcpy(d->bitmap->get_ptr(),
        d->dev_bitmap,
        d->bitmap->image_size(),
        cudaMemcpyDeviceToHost));
}

初始化代码如下:

#ifdef USE_CONST_MEM
#else
    HANDLE_ERROR(cudaMalloc((void**)&s,
        sizeof(Sphere) * SPHERES));
#endif

    // allocate temp memory, initialize it, copy to constant
    // memory on the GPU, then free our temp memory
    Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere) * SPHERES);
    for (int i = 0; i<SPHERES; i++) {
        temp_s[i].r = rnd(1.0f);
        temp_s[i].g = rnd(1.0f);
        temp_s[i].b = rnd(1.0f);
        temp_s[i].x = rnd(1000.0f) - 500;
        temp_s[i].y = rnd(1000.0f) - 500;
        temp_s[i].z = rnd(1000.0f) - 500;
        temp_s[i].radius = rnd(10.0f) + 5;

        temp_s[i].dx = STEP_SIZE * ((rand() / (float)RAND_MAX) * 2 - 1);
        temp_s[i].dy = STEP_SIZE * ((rand() / (float)RAND_MAX) * 2 - 1);
        temp_s[i].dz = STEP_SIZE * ((rand() / (float)RAND_MAX) * 2 - 1);
    }

#ifdef USE_CONST_MEM
    HANDLE_ERROR(cudaMemcpyToSymbol(s, temp_s,
        sizeof(Sphere) * SPHERES));
#else
    HANDLE_ERROR(cudaMemcpy(s, temp_s, sizeof(Sphere)*SPHERES, cudaMemcpyHostToDevice));
#endif

    free(temp_s);

cuda的版本是8.0。系统是ubuntu 16.04。

1 个答案:

答案 0 :(得分:1)

是的,我知道我哪里错了。当我使用常量内存时,我也尝试在尝试修改常量值的函数kernel_moving中更改它的值。所以该计划将打破。现在,我改为这个,它有效。

rand()