如果训练集的大小不是批量大小的整数倍

时间:2021-06-09 02:01:35

标签: pytorch

我针对PV_Elec_Gas3.csv的数据集运行以下代码,网络架构设计如下

class CNN_ForecastNet(nn.Module):
    def __init__(self):
        super(CNN_ForecastNet,self).__init__()
        self.conv1d = nn.Conv1d(3,64,kernel_size=1)
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Linear(64*2,50)
        self.fc2 = nn.Linear(50,1)
        
    def forward(self,x):
        x = self.conv1d(x)
        x = self.relu(x)
        x = x.view(-1)
        #print('x size',x.size())
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        
        return x

train 函数定义如下,

def Train():
    
    running_loss = .0
    
    model.train()
    
    for idx, (inputs,labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        #print('inputs ',inputs)
        preds = model(inputs.float())
        loss = criterion(preds,labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss
        
    train_loss = running_loss/len(train_loader)
    train_losses.append(train_loss.detach().numpy())
    
    print(f'train_loss {train_loss}')

train_loader 定义为 train_loader = torch.utils.data.DataLoader(train,batch_size=2,shuffle=False) 此处 batch_size 设置为 2。运行 train 函数时,我收到如下错误消息。原因是当代码遍历 train_loader 时,最后一次迭代只有一个训练点,而不是 batch_size 要求的两个。对于这种场景,除了改变batch size,还有其他选择吗?

这是错误信息。我还包含了重现错误的完整代码

RuntimeError                              Traceback (most recent call last)
<ipython-input-82-78a49fb8c068> in <module>
     99 for epoch in range(epochs):
    100     print('epochs {}/{}'.format(epoch+1,epochs))
--> 101     Train()
    102     gc.collect()

<ipython-input-82-78a49fb8c068> in Train()
     81         optimizer.zero_grad()
     82         #print('inputs ',inputs)
---> 83         preds = model(inputs.float())
     84         loss = criterion(preds,labels.float())
     85         loss.backward()

~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

<ipython-input-82-78a49fb8c068> in forward(self, x)
     57         x = x.view(-1)
     58         #print('x size',x.size())
---> 59         x = self.fc1(x)
     60         x = self.relu(x)
     61         x = self.fc2(x)

~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
     91 
     92     def forward(self, input: Tensor) -> Tensor:
---> 93         return F.linear(input, self.weight, self.bias)
     94 
     95     def extra_repr(self) -> str:

~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
   1690         ret = torch.addmm(bias, input, weight.t())
   1691     else:
-> 1692         output = input.matmul(weight.t())
   1693         if bias is not None:
   1694             output += bias

RuntimeError: mat1 dim 1 must match mat2 dim 0

以下是错误重现代码

import numpy as np # 线性代数 import pandas as pd #数据处理,CSV文件I/O(例如pd.read_csv)

from numpy import array
import torch
import gc
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
from torch.utils.data import Dataset,DataLoader

solar_power = pd.read_csv('PV_Elec_Gas3.csv').rename(columns={'date':'timestamp'}).set_index('timestamp')

train_set = solar_power[:'8/10/2016']

def split_sequence(sequence, n_steps):
    x, y = list(), list()
    for i in range(len(sequence)):
        
        end_ix = i + n_steps
        
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        x.append(seq_x)
        y.append(seq_y)
    return array(x), array(y)


n_steps = 3
train_x,train_y = split_sequence(train_set.loc[:,"kWh electricity/day"].values,n_steps)

class ElecDataset(Dataset):
    def __init__(self,feature,target):
        self.feature = feature
        self.target = target
    
    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self,idx):
        item = self.feature[idx]
        label = self.target[idx]
        
        return item,label

class CNN_ForecastNet(nn.Module):
    def __init__(self):
        super(CNN_ForecastNet,self).__init__()
        self.conv1d = nn.Conv1d(3,64,kernel_size=1)
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Linear(64*2,50)
        self.fc2 = nn.Linear(50,1)
        
    def forward(self,x):
        x = self.conv1d(x)
        x = self.relu(x)
        x = x.view(-1)
        #print('x size',x.size())
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        
        return x

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN_ForecastNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.MSELoss()

train_losses = []

def Train():
    
    running_loss = .0
    
    model.train()
    
    for idx, (inputs,labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        #print('inputs ',inputs)
        preds = model(inputs.float())
        loss = criterion(preds,labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss
        
    train_loss = running_loss/len(train_loader)
    train_losses.append(train_loss.detach().numpy())
    
    print(f'train_loss {train_loss}')
    

train = ElecDataset(train_x.reshape(train_x.shape[0],train_x.shape[1],1),train_y)
train_loader = torch.utils.data.DataLoader(train,batch_size=2,shuffle=False)

epochs = 1
for epoch in range(epochs):
    print('epochs {}/{}'.format(epoch+1,epochs))
    Train()
    gc.collect()

   

1 个答案:

答案 0 :(得分:2)

不!!!!

enter image description here

在您的 forward 方法中,您在将其传递到 x.view(-1) 层之前nn.Linear。这不仅“扁平化”了 x 上的空间维度,还“扁平化”了 batch 维度!您基本上将批次中的所有样本混合在一起,使您的模型依赖于批次大小,并且通常使预测依赖于整个批次而不是单个数据点。

相反,您应该:

  ...
  def forward(self, x):
    x = self.conv1d(x)
    x = self.relu(x)
    x = x.flatten(start_dim=1)  # flatten all BUT batch dimension
    x = self.fc1(x)  # you'll probably have to modify in_features of fc1 now
    x = self.relu(x)
    x = self.fc2(x)
    return x

有关详细信息,请参阅 flatten()


如果由于某种原因,您必须只处理“完整批次”,您可以通过将参数 drop_last 从默认 False 更改为 { 来告诉 DataLoader 删除最后一批{1}}:

True
相关问题