Question

我试图编写一个程序，该程序为生成的任意N个数字计算pi。

   __global__ void kernel(int* count_d, float* randomnums, double N){
      int i;
      double x, y, z;
      //Find the overall ID of the thread
      int tid = blockDim.x*blockIdx.x+threadIdx.x;
      i = tid;
      int xidx=0;
      int yidx=0;


      //Start the MonteCarlo
      xidx = i + i;
      yidx = xidx +1;


      //Get the random x,y points 
        x = randomnums[xidx];
        y = randomnums[yidx];
        z = ((x*x) + (y*y));

      if (z<=1)
        count_d[tid] = 1;
      else
        count_d[tid] = 0;
    }



    int main(){

      double N = 100000;
      float *randomnums;
      double pi;


      //Threads per thread block to be launched
      int threads = 1024;
      //Number of thread blocks launched
      int blocks = 100;
      int* count_d;

      unsigned int reducedcount = 0;

      for (int i=threads*blocks; i<=N; i+=threads*blocks){

      //Allocate the array for the random numbers
      cudaMalloc((void**)&randomnums,(i)*sizeof(float));
      //Use CuRand to generate an array of random numbers on the device
      int status;
      curandGenerator_t gen;
      status = curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_MRG32K3A);
      status |= curandSetPseudoRandomGeneratorSeed(gen, 4294967296ULL^time(NULL));
      status |= curandGenerateUniform(gen, randomnums, (i));
      status |= curandDestroyGenerator(gen);

      //Check to see if there was any problem launching the CURAND kernels and generating
      //the random numbers on the device
      if (status != CURAND_STATUS_SUCCESS){
        printf("CuRand Failure\n");
        exit(EXIT_FAILURE);
      }

        int *count = (int*)malloc(blocks*threads*sizeof(int));
        unsigned int reducedcount = 0;
        //Allocate the array to hold a value (1,0) whether the point in is the circle (1) or not (0)
        cudaMalloc((void**)&count_d, (blocks*threads)*sizeof(int));

        kernel <<<blocks, threads>>> (count_d, randomnums, i);

        cudaDeviceSynchronize();
        cudaMemcpy(count, count_d, blocks*threads*sizeof(int),cudaMemcpyDeviceToHost);


        //Reduce array into int
        for(int j = 0; j<N; j++){
           reducedcount += count[j];
        }

        //Free the cudaMalloc()'d arrays
        cudaFree(randomnums);
        cudaFree(count_d);
        free(count);
     }
      //Find the ratio
      pi = ((double)reducedcount/N)*4.0;
      printf("Pi: %f\n", pi);

      return
 0;
}

我想到了一个将N分成块*线程大小的部分，并为每个部分添加内核的方法。原来，当我增加N变量时，我得到了分段错误：Segmentation fault (core dumped)。为什么会这样？我也想问一下我在一个循环中吃午餐的想法是否正确。

Answer 1

您似乎在做很多手动内存管理。这很糟糕，并导致错误和难以阅读的代码。考虑下面的代码，其中使用var id_of_the_product = $('#is-a-gift').data( 'gift_product_id' ) ; var items_in_cart = (cart_item_count) - (gift_wraps_in_cart); $.ajax({ type: 'POST', url: '/cart/update.js', data: JSON.parse('{ "updates": { "'+ id_of_the_product +'" : "'+items_in_cart+'" }, "attributes": { "gift-wrapping": "'+gift_wrapping_type+'" } }'), dataType: 'json', success: function() { } });代替原始指针。

thrust::device_vector

蒙特卡洛在库达

1 个答案: