Question

我有一个模型，它是带有resnext模型的二进制图像分类模型。当我到达测试集时，我一直收到运行时错误。错误消息是 RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'weight'

我正在像我的火车模型一样将测试集张量发送到我的GPU。我已经看了以下内容，并且正在按照上面的建议进行操作。

这是我的模型代码：

resnext = models.resnext50_32x4d(pretrained=True)
resnext = resnext.to(device)
for param in resnext.parameters():
    param.requires_grad = True
resnext.classifier = nn.Sequential(nn.Linear(2048, 1000),
                                 nn.ReLU(),
                                 nn.Dropout(0.4),
                                 nn.Linear(1000, 2),
                                 nn.Softmax(dim = 1))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnext.classifier.parameters(), lr=0.001)
import time
start_time = time.time()

epochs = 1

max_trn_batch = 5
max_tst_batch = 156

y_val_list = []
policy_list = []

train_losses = []
test_losses = []
train_correct = []
test_correct = []

for i in range(epochs):
    for i in tqdm(range(0, max_trn_batch)):
        trn_corr = 0
        tst_corr = 0

        # Run the training batches
        for b, (X_train, y_train, policy) in enumerate(train_loader):
            #print(y_train, policy)
            X_train = X_train.to(device)
            y_train = y_train.to(device)
            if b == max_trn_batch:
                break
            b+=1

            # Apply the model
            y_pred = resnext(X_train)
            loss = criterion(y_pred, y_train)

            # Tally the number of correct predictions
            predicted = torch.max(y_pred.data, 1)[1]
            batch_corr = (predicted == y_train).sum()
            trn_corr += batch_corr
            # Update parameters
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Print interim results
            if b%1 == 0:
                print(f'epoch: {i:2}  batch: {b:4} [{100*b:6}/63610]  loss: {loss.item():10.8f}  \
    accuracy: {trn_corr.item()/(100*b):7.3f}%')

        train_losses.append(loss)
        train_correct.append(trn_corr)

        # Run the testing batches
        with torch.no_grad():
            for b, (X_test, y_test, policy) in enumerate(test_loader):
                policy_list.append(policy)
                X_test.to(device)
                y_test.to(device)
                if b == max_tst_batch:
                    break

                # Apply the model
                y_val = resnext(X_test)
                y_val_list.append(y_val.data)
                # Tally the number of correct predictions
                predicted = torch.max(y_val.data, 1)[1] 
                tst_corr += (predicted == y_test).sum()

        loss = criterion(y_val, y_test)
        test_losses.append(loss)
        test_correct.append(tst_corr)

    print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed

这是完整的追溯：

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-84-48bce2e8d4fa> in <module>
     60 
     61                 # Apply the model
---> 62                 y_val = resnext(X_test)
     63                 y_val_list.append(y_val.data)
     64                 # Tally the number of correct predictions

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    545             result = self._slow_forward(*input, **kwargs)
    546         else:
--> 547             result = self.forward(*input, **kwargs)
    548         for hook in self._forward_hooks.values():
    549             hook_result = hook(self, input, result)

C:\ProgramData\Anaconda3\lib\site-packages\torchvision\models\resnet.py in forward(self, x)
    194 
    195     def forward(self, x):
--> 196         x = self.conv1(x)
    197         x = self.bn1(x)
    198         x = self.relu(x)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    545             result = self._slow_forward(*input, **kwargs)
    546         else:
--> 547             result = self.forward(*input, **kwargs)
    548         for hook in self._forward_hooks.values():
    549             hook_result = hook(self, input, result)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
    341 
    342     def forward(self, input):
--> 343         return self.conv2d_forward(input, self.weight)
    344 
    345 class Conv3d(_ConvNd):

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in conv2d_forward(self, input, weight)
    338                             _pair(0), self.dilation, self.groups)
    339         return F.conv2d(input, weight, self.bias, self.stride,
--> 340                         self.padding, self.dilation, self.groups)
    341 
    342     def forward(self, input):

RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'weight'

同样，我的张量和模型被发送到GPU，所以我不确定发生了什么。有人看到我的错误吗？

Answer 1

[...]我的张量和模型被发送到GPU [...]

不是test张量。这是一个简单的错误：

X_test.to(device)
y_test.to(device)

应该是

X_test = X_test.to(device)
y_test = y_test.to(device)

为什么我在测试集上遇到Pytorch运行时错误

1 个答案: