如何在OpenCL中使用clCreateProgramWithBinary?

时间:2011-09-07 18:27:16

标签: opencl

我正在尝试使用clCreateProgramWithBinary获取基本程序。这是我知道如何使用它而不是“真正的”应用程序。

我看到其中一个参数是二进制文件列表。我究竟要如何创建一个二进制文件来测试?我有一些测试代码,它从源代码创建程序,构建和排队。是否在此过程中的某个时刻创建了二进制文件,我可以将其输入clCreateProgramWithBinary?

这是我的一些代码,只是为了了解我的整体流程。为简单起见,我省略了注释和错误检查。

program = clCreateProgramWithSource(clctx, 1, &dumbkernelsource, NULL, &errcode);
errcode = clBuildProgram(program, env->num_devices, env->device, NULL, NULL, NULL);
mykernel = clCreateKernel(program, "flops", &errcode);
errcode = clGetKernelWorkGroupInfo(mykernel, *(env->device), CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
global = num_workgroups * local;
errcode = clEnqueueNDRangeKernel(commands, mykernel, 1, NULL, &global, &local, 0, NULL, NULL);

3 个答案:

答案 0 :(得分:3)

编译程序后,可以使用clGetProgramInfo获取其二进制代码,然后将其保存到文件中。

示例代码(未尝试编译,但应该是这些内容):

program = clCreateProgramWithSource(clctx, 1, &dumbkernelsource, NULL, &errcode);
errcode = clBuildProgram(program, env->num_devices, env->device, NULL, NULL, NULL);
int number_of_binaries;
char **binary;
int *binary_sizes;
errcode = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, NULL, 0, &number_of_binaries);
binary_sizes = new int[number_of_binaries];
binary = new char*[number_of_binaries];
errcode = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, binary_sizes, number_of_binaries*sizeof(int), &number_of_binaries);
for (int i = 0; i < number_of_binaries; ++i) binary[i] = new char[binary_sizes[i]];
errcode = clGetProgramInfo(program, CL_PROGRAM_BINARIES, binary, number_of_binaries*sizeof(char*), &number_of_binaries);

答案 1 :(得分:1)

最小可运行示例

从CL C源编译嵌入式矢量增量着色器,将二进制文件保存到a.bin,加载二进制着色器并运行它:

./a.out

断言在程序结束时完成。

忽略CL C着色器,从a.bin加载二进制文件并运行它:

./a.out 0

编译并运行:

gcc -ggdb3 -std=c99 -Wall -Wextra a.c -lOpenCL && ./a.out

在Ubuntu 16.10中测试,NVIDIA NVS5400,驱动程序375.39。

GitHub上游:https://github.com/cirosantilli/cpp-cheat/blob/b1e9696cb18a12c4a41e0287695a2a6591b04597/opencl/binary_shader.c

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <CL/cl.h>

const char *source =
    "__kernel void kmain(__global int *out) {\n"
    "    out[get_global_id(0)]++;\n"
    "}\n"
;

#define BIN_PATH "a.bin"

char* common_read_file(const char *path, long *length_out) {
    char *buffer;
    FILE *f;
    long length;

    f = fopen(path, "r");
    assert(NULL != f);
    fseek(f, 0, SEEK_END);
    length = ftell(f);
    fseek(f, 0, SEEK_SET);
    buffer = malloc(length);
    if (fread(buffer, 1, length, f) < (size_t)length) {
        return NULL;
    }
    fclose(f);
    if (NULL != length_out) {
        *length_out = length;
    }
    return buffer;
}

int main(int argc, char **argv) {
    FILE *f;
    char *binary;
    cl_command_queue command_queue;
    cl_context context;
    cl_device_id device;
    cl_int input[] = {1, 2}, errcode_ret, binary_status;
    cl_kernel kernel, binary_kernel;
    cl_mem buffer;
    cl_platform_id platform;
    cl_program program, binary_program;
    const size_t global_work_size = sizeof(input) / sizeof(input[0]);
    int use_cache;
    long lenght;
    size_t binary_size;

    if (argc > 1) {
        use_cache = !strcmp(argv[1], "0");
    } else {
        use_cache = 0;
    }

    /* Get the binary, and create a kernel with it. */
    clGetPlatformIDs(1, &platform, NULL);
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
    context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
    command_queue = clCreateCommandQueue(context, device, 0, NULL);
    if (use_cache) {
        binary = common_read_file(BIN_PATH, &lenght);
        binary_size = lenght;
    } else {
        program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
        clBuildProgram(program, 1, &device, "", NULL, NULL);
        kernel = clCreateKernel(program, "kmain", NULL);
        clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_size, NULL);
        binary = malloc(binary_size);
        clGetProgramInfo(program, CL_PROGRAM_BINARIES, binary_size, &binary, NULL);
        f = fopen(BIN_PATH, "w");
        fwrite(binary, binary_size, 1, f);
        fclose(f);
    }
    binary_program = clCreateProgramWithBinary(
        context, 1, &device, &binary_size,
        (const unsigned char **)&binary, &binary_status, &errcode_ret
    );
    free(binary);
    clBuildProgram(binary_program, 1, &device, NULL, NULL, NULL);
    binary_kernel = clCreateKernel(binary_program, "kmain", &errcode_ret);

    /* Run the kernel created from the binary. */
    buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(input), input, NULL);
    clSetKernelArg(binary_kernel, 0, sizeof(buffer), &buffer);
    clEnqueueNDRangeKernel(command_queue, binary_kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
    clFlush(command_queue);
    clFinish(command_queue);
    clEnqueueReadBuffer(command_queue, buffer, CL_TRUE, 0, sizeof(input), input, 0, NULL, NULL);

    /* Assertions. */
    assert(input[0] == 2);
    assert(input[1] == 3);

    /* Cleanup. */
    clReleaseMemObject(buffer);
    clReleaseKernel(kernel);
    clReleaseKernel(binary_kernel);
    clReleaseProgram(program);
    clReleaseProgram(binary_program);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);
    return EXIT_SUCCESS;
}

我强烈推荐cat a.bin,其中包含用于此实现的人类可读(和可编辑)PTX程序集。

答案 2 :(得分:0)

正式的OpenCL编程指南书有一个很好的例子。还有一个Google代码项目opencl-book-sa​​mples,其中包含了本书中的代码。您要查找的示例是here