我正在尝试学习 cuda 并将我当前的项目转换为使用它,但出现此错误:
<块引用>错误 MSB3721 命令 ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin\nvcc.exe" -gencode=arch=compute_52,code="sm_52,compute_52" --use- local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\HostX86\x64" -x cu -I"C:\Program Files \NVIDIA GPU 计算工具包\CUDA\v11.2\include" -I"C:\Program Files\NVIDIA GPU 计算工具包\CUDA\v11.2\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -D_DEBUG -D_CONSOLE -D_UNICODE -DUNICODE -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64\Debug\vc142.pdb /FS /Zi /RTC1 /MDd " -o x64\Debug\cudaMain.cu.obj "C:\Users[my usr name]\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2\cudaMain.cu"" 以代码 255 退出。
我正在使用 .cuh 文件如何声明类它给我一个警告说“属性不适用于实体”。我是否需要标记我还收到一条警告,告诉我“警告 C26812 枚举类型‘cudaError’是无作用域的。更喜欢‘枚举类’而不是‘枚举’(Enum.3)。”
.cuh 文件
#pragma once
#include <iostream>
#include <fstream>
#include <stdlib.h>
#include <string>
#include <vector>
#include <algorithm>
#include <ctime>
#pragma warning(disable : 4996)
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
using namespace std;
static unsigned const int maxCircuitSizeG = 200;
static const int inputSizeG = 16;
static const int outputSizeG = 9;
static const short childParentAmountG = 10;
__host__ __device__ class Gate {
public:
char type;
int children[childParentAmountG];
int parents[childParentAmountG];
bool output;
__host__ __device__ Gate();
};
__host__ __device__ class Circuit {
public:
Gate gates[maxCircuitSizeG];
bool inputs[inputSizeG];
bool outputs[outputSizeG];
double score;
unsigned int averageCounter; // up to 4,294,967,295
int size;
__host__ __device__ Circuit();
};
__host__ __device__ unsigned int randumb(void);
unsigned int randumb2(void);
__host__ __device__ bool IsBoolInParents(Gate gate, Gate circuit[], bool boolToFind);
__host__ __device__ bool XORgateOutput(Gate gate, Gate circuit[]);
__host__ __device__ bool IsIntInArr(int arr[], int arrSize, int num);
bool IsShortInArr(short arr[], short arrSize, short num);
bool IsIntInVector(vector<int> vec, int num);
__host__ __device__ bool PushInt(int arr[], int arrSize, int num);
bool PushShort(short arr[], short arrSize, short num);
__host__ __device__ int CountCircuitSize(Gate circuit[]);
int CountCircuitSize2(Gate circuit[]);
__host__ __device__ void RemoveIntAndShiftArr(int arr[], int arrSize, int indexToRemove);
void RemoveShortAndShiftArr(short arr[], short arrSize, short indexToRemove);
int IntPow(int num, int exponent);
int BinaryToDecimal(bool bits[], int byteSize, bool firstIsMostSignificant);
string IntToString(int num);
void DecimalToBinary(int n, bool byte[], int byteSize);
void ShiftBinary(bool byte[], int byteSize, bool shiftLeft, int shiftAmount);
string BinaryToString(bool byte[], int byteSize);
__host__ __device__ void CopyGate(Gate& to, Gate from);
__host__ __device__ void RandomGateType(Gate circuit[], int circuitIndex);
__host__ __device__ void DestroyGate(Gate circuit[], int indexToRemove, int circuitSize);
__host__ __device__ void CleanCircuit(Gate circuit[], int circuitSize);
__host__ __device__ void AddChild(Gate circuit[], int circuitSize, int index);
__host__ __device__ void AddParent(Gate circuit[], int circuitSize, int index);
__host__ __device__ void CreateGate(Circuit& circuit, int indexToAdd, int circuitSize);
__host__ __device__ void CreateGate2(Circuit& circuit, int indexToAdd, int circuitSize);
__host__ __device__ void SafeFixCircuit(Gate circuit[], int circuitSize);
__host__ __device__ bool GateOutput(Gate gate, Gate circuit[]);
__host__ __device__ void Process(Circuit& circuit);
__host__ __device__ void ProcessFromCharArr(Circuit& circuit, char arr[]);
__host__ __device__ void RandomCircuit(Circuit& circuit, int circuitSize, int startingChildParentAmount);
__host__ __device__ void RemoveChild(Gate circuit[], int circuitSize, int index);
__host__ __device__ void Mutate(Circuit& circuit, int growChance, int shrinkChance, int grow, int shrink, int rate, int intensity);
void CreateAdderCircuit(Gate circuit[]);
string CircuitToString(Circuit circuit);
void SaveCircuit(string path, Circuit circuit, int circuitSize);
void FileToCircuit(string path, Gate circuit[]);
__host__ __device__ void InitRndPop(Circuit population[], int popSize, int startCircuitSize, int startChildParentAmount);
void InitPopFromFile(Circuit population[], int popSize, string path);
vector<string> MakeRndSample(int sampleSize, string path);
__host__ __device__ void Score3(Circuit& circuit, char arr[]);
__host__ __device__ void CopyCircuit(Circuit from, Circuit& to);
__host__ __device__ void CopyCircuitToPopulation(Circuit circuit, Circuit population[], unsigned short populationSize);
void CopyCircuit2(Circuit from, Circuit& to);
void CopyCircuitsToPop(vector<int> circuitsIndexes, Circuit population[], unsigned short populationSize);
__host__ __device__ void ScoreAverageFromArray(char arr[], int arrSize, Circuit& circuit);
__host__ __device__ void RandomBruteForceImproveFromArray(Circuit& circuit, char arr[], unsigned int arrSize, unsigned int maxSearch);
void fileToCharArr(char arr[], int size, string path);
带有定义的 .cu 太大而无法包含,但它们都没有主机 设备他们不需要的东西,对吗?
主 .cu 文件
#include "LogicSimCuda.cuh"
#include <stdio.h>
cudaError_t improveCircuitPopWithCuda(Circuit* circuit, char arr[], int arrSize, unsigned int size);
__global__ void addKernel(Circuit *circuit, char arr[], int arrSize, const int maxSearch)
{
int i = threadIdx.x;
RandomBruteForceImproveFromArray(circuit[i], arr, arrSize, maxSearch);
}
int main()
{
const int populationSize = 1024;
Circuit *population = new Circuit[populationSize];
InitPopFromFile(population, populationSize, "C:/Users/voidm/Documents/LogicSimProjectGIT/LogicSim/Circuits/day2/Sun_Dec_20_12_54_59_2020.txt");
unsigned const int fileSize = (inputSizeG + outputSizeG + 1) * 65536;
char* trainingArr = new char[fileSize];
fileToCharArr(trainingArr, fileSize,"C:/Users/voidm/Documents/LogicSimProjectGIT/LogicSim/src/Eight-Bit-Adder-Data.txt" );
cudaError_t cudaStatus = improveCircuitPopWithCuda(population, trainingArr, fileSize, populationSize);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "improveCircuitPopWithCuda failed!");
return 1;
}
std::cout << population[0].score;
return 0;
}
cudaError_t improveCircuitPopWithCuda(Circuit* circuitPop, char arr[], int arrSize, unsigned int size)
{
Circuit *dev_circuit;
cudaError_t cudaStatus;
// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
goto Error;
}
// Allocate GPU buffers .
cudaStatus = cudaMalloc((void**)&dev_circuit, size * sizeof(Circuit));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
// Copy from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_circuit, circuitPop, size * sizeof(Circuit), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
// Launch a kernel on the GPU with one thread for each element.
addKernel <<<1, size >>> (dev_circuit, arr, arrSize, 1000000);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
goto Error;
}
// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(circuitPop, dev_circuit, size * sizeof(Circuit), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
Error:
cudaFree(dev_circuit);
return cudaStatus;
}
```
here is the output
>1>------ Build started: Project: LogicGateMachineLearning_V2, Configuration: Debug x64 ------
1>Compiling CUDA source file cudaMain.cu...
1>Compiling CUDA source file LogicSimCuda.cu...
1>
1>C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin\nvcc.exe" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\HostX86\x64" -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -D_DEBUG -D_CONSOLE -D_UNICODE -DUNICODE -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64\Debug\vc142.pdb /FS /Zi /RTC1 /MDd " -o x64\Debug\LogicSimCuda.cu.obj "C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2\LogicSimCuda.cu"
1>
1>C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin\nvcc.exe" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\HostX86\x64" -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -D_DEBUG -D_CONSOLE -D_UNICODE -DUNICODE -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64\Debug\vc142.pdb /FS /Zi /RTC1 /MDd " -o x64\Debug\cudaMain.cu.obj "C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2\cudaMain.cu"
1>ptxas fatal : Unresolved extern function '_Z32RandomBruteForceImproveFromArrayR7CircuitPcjj'
1>cudaMain.cu
1>C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Microsoft\VC\v160\BuildCustomizations\CUDA 11.2.targets(785,9): error MSB3721: The command ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin\nvcc.exe" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\HostX86\x64" -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -D_DEBUG -D_CONSOLE -D_UNICODE -DUNICODE -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64\Debug\vc142.pdb /FS /Zi /RTC1 /MDd " -o x64\Debug\cudaMain.cu.obj "C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2\cudaMain.cu"" exited with code 255.
1>Done building project "LogicGateMachineLearning_V2.vcxproj" -- FAILED.
1>LogicSimCuda.cu
========== Build: 0 succeeded, 1 failed, 0 up-to-date, 0 skipped ==========
答案 0 :(得分:2)
这个:
__host__ __device__ class Gate {
public:
char type;
int children[childParentAmountG];
int parents[childParentAmountG];
bool output;
__host__ __device__ Gate();
};
是非法的。执行空间说明符(所以 __host__
和 __device__
)适用于函数和变量声明和定义,而不是类型。正确的类声明应该是
class Gate {
public:
char type;
int children[childParentAmountG];
int parents[childParentAmountG];
bool output;
__host__ __device__ Gate();
};
您的代码中很可能还有其他问题,但没有看到实际的编译错误日志,也没有精力去浏览问题中转储的所有代码,这就是您确定的一个编译错误的根源。
>