Question

它包含使用完全卷积网络识别具有挑战性的手写注释的实现。

在训练过程中，发生错误，训练期间NaN丢失。可能是什么原因？

我尝试将损失函数更改为tanh并更改了超参数，但问题仍然存在

import torch

from PIL import Image
from matplotlib import pyplot as plt

import os
import numpy as np
from tqdm import trange, tqdm

import torch.optim as optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader

import random
import time

import math
import numbers

import torchfcn

import subprocess

def make_cv_folds(img_root, gt_root, num_folds, random_state):
    samples = []
    folds = [list() for i in range(num_folds)]
    lengths = [0]*num_folds
    img_root = os.path.expanduser(img_root)
    gt_root = os.path.expanduser(gt_root)
    for f in sorted(os.listdir(img_root)):
        if not os.path.isfile(os.path.join(img_root, f)) or not os.path.isfile(os.path.join(gt_root, f.rsplit(".", 1)[0] + ".png")):
            raise Exception("GT fehlt")
        samples.append((os.path.join(img_root, f), os.path.join(gt_root, f.rsplit(".", 1)[0] + ".png")))

    np.random.seed(random_state)
    np.random.shuffle(samples)
    np.random.seed()

    for s in samples:
        idx = np.argmin(lengths)
        folds[idx].append(s)
        lengths[idx] += 1
    return folds

def load_sample(img_path, gt_path):
    img = Image.open(img_path)
    gt = Image.open(gt_path)
    gt = np.array(gt)[:,:,2]

    #binary format
    gt[gt == 0] = 2
    gt[gt == 255] = 1

    #hisdb format
    gt[gt == 1] = 1
    gt[(gt%8) == 0] = 1
    gt[(gt%4) == 0] = 1
    gt[(gt%2) == 0] = 0
    gt = Image.fromarray(gt)
    return img, gt


class Annotations(Dataset):
    class_names = np.array(['other', 'annotation'])

    def __init__(self, img_root, gt_root, loader=load_sample, num_folds=5, preprocess=None, random_state=None):
        self.folds = make_cv_folds(img_root, gt_root, num_folds=num_folds, random_state=random_state)
        self.img_root = img_root
        self.num_folds = num_folds
        self.preprocess = preprocess
        self.loader = loader
        self.is_training = True
        self.load_split(num=0)

    def train(self, val):
        if val:
            self.is_training = True
            self.samples = self.train_samples
        else:
            self.is_training = False
            self.samples = self.test_samples

    def load_split(self, num=0):
        if len(self.folds) == 1:
            self.train_samples = self.folds[0]
            self.test_samples = self.folds[0]
        else:
            num = num%len(self.folds)
            train_folds = list(range(0,num)) + list(range(num+1, len(self.folds)))
            test_fold = num
            self.train_samples = []
            for i in train_folds:
                self.train_samples.extend(self.folds[i])
            self.test_samples = self.folds[num]
        if self.is_training:
            self.samples = self.train_samples
        else:
            self.samples = self.test_samples

    def untransform(self, img, gt):
        img = img.numpy()
        img = img.transpose(1, 2, 0)
        img = img.astype(np.uint8)
        img = img[:, :, ::-1]
        gt = gt.numpy()
        return img, gt


    def __getitem__(self, index):
        img_path, gt_path = self.samples[index]
        img, gt = self.loader(img_path, gt_path)
        if self.preprocess is not None:
            state = time.time()
            img = self.preprocess(img, random_state=state)
            gt = self.preprocess(gt, random_state=state)

        img = np.array(img, dtype=np.uint8)
        img = img[:, :, ::-1]  # RGB -> BGR
        img = img.astype(np.float64)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).float()
        gt = np.array(gt, dtype=np.int32)
        gt = torch.from_numpy(gt).long()
        return img, gt

    def __len__(self):
        return len(self.samples)

    def __repr__(self):
        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
        fmt_str += '    Number of training samples: {}\n'.format(len(self.train_samples))
        fmt_str += '    Number of testing samples: {}\n'.format(len(self.test_samples))
        return fmt_str

class CenterCrop(object):
    """Crops the given PIL Image at the center.
    Args:
        size (sequence or int): Desired output size of the crop. If size is an
            int instead of sequence like (h, w), a square crop (size, size) is
            made.
    """

    def __init__(self, size):
        if isinstance(size, numbers.Number):
            self.size = (int(size), int(size))
        else:
            self.size = size

    def __call__(self, img, random_state=None):
        """
        Args:
            img (PIL Image): Image to be cropped.
        Returns:
            PIL Image: Cropped image.
        """
        return transforms.functional.center_crop(img, self.size)

    def __repr__(self):
        return self.__class__.__name__ + '(size={0})'.format(self.size)


class RandomResizedCrop(object):
    """Crop the given PIL Image to random size and aspect ratio.
    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
    is finally resized to given size.
    This is popularly used to train the Inception networks.
    Args:
        size: expected output size of each edge
        scale: range of size of the origin size cropped
        ratio: range of aspect ratio of the origin aspect ratio cropped
        interpolation: Default: PIL.Image.BILINEAR
    """

    def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.NEAREST):
        self.size = (size, size)
        self.interpolation = interpolation
        self.scale = scale
        self.ratio = ratio

    @staticmethod
    def get_params(img, scale, ratio, random_state=None):
        """Get parameters for ``crop`` for a random sized crop.
        Args:
            img (PIL Image): Image to be cropped.
            scale (tuple): range of size of the origin size cropped
            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
        Returns:
            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
                sized crop.
        """


        random.seed(random_state)
        for attempt in range(10):
            area = img.size[0] * img.size[1]
            target_area = random.uniform(*scale) * area
            aspect_ratio = random.uniform(*ratio)

            w = int(round(math.sqrt(target_area * aspect_ratio)))
            h = int(round(math.sqrt(target_area / aspect_ratio)))

            if random.random() < 0.5:
                w, h = h, w

            if w <= img.size[0] and h <= img.size[1]:
                i = random.randint(0, img.size[1] - h)
                j = random.randint(0, img.size[0] - w)
                return i, j, h, w

        # Fallback
        w = min(img.size[0], img.size[1])
        i = (img.size[1] - w) // 2
        j = (img.size[0] - w) // 2
        return i, j, w, w

    def __call__(self, img, random_state=None):
        """
        Args:
            img (PIL Image): Image to be cropped and resized.
        Returns:
            PIL Image: Randomly cropped and resized image.
        """
        i, j, h, w = self.get_params(img, self.scale, self.ratio, random_state=random_state)
        return transforms.functional.resized_crop(img, i, j, h, w, self.size, self.interpolation)

    def __repr__(self):
        interpolate_str = _pil_interpolation_to_str[self.interpolation]
        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
        format_string += ', scale={0}'.format(round(self.scale, 4))
        format_string += ', ratio={0}'.format(round(self.ratio, 4))
        format_string += ', interpolation={0})'.format(interpolate_str)
        return format_string


class RandomCrop(object):
    """Crop the given PIL Image at a random location.
    Args:
        size (sequence or int): Desired output size of the crop. If size is an
            int instead of sequence like (h, w), a square crop (size, size) is
            made.
        padding (int or sequence, optional): Optional padding on each border
            of the image. Default is 0, i.e no padding. If a sequence of length
            4 is provided, it is used to pad left, top, right, bottom borders
            respectively.
    """

    def __init__(self, size, padding=0):
        if isinstance(size, numbers.Number):
            self.size = (int(size), int(size))
        else:
            self.size = size
        self.padding = padding

    @staticmethod
    def get_params(img, output_size, random_state=None):
        """Get parameters for ``crop`` for a random crop.
        Args:
            img (PIL Image): Image to be cropped.
            output_size (tuple): Expected output size of the crop.
        Returns:
            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
        """
        random.seed(random_state)

        w, h = img.size
        th, tw = output_size
        if w == tw and h == th:
            return 0, 0, h, w

        i = random.randint(0, h - th)
        j = random.randint(0, w - tw)
        return i, j, th, tw

    def __call__(self, img, random_state=None):
        """
        Args:
            img (PIL Image): Image to be cropped.
        Returns:
            PIL Image: Cropped image.
        """
        if self.padding > 0:
            img = F.pad(img, self.padding)

        i, j, h, w = self.get_params(img, self.size, random_state=random_state)

        return transforms.functional.crop(img, i, j, h, w)

    def __repr__(self):
        return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding)

preprocess_train = RandomResizedCrop(size=1024)
preprocess_test = RandomResizedCrop(size=1024)


trainset = Annotations(img_root='public_shared/input/',
                      gt_root='public_shared/new_bin',
                      preprocess=preprocess_train,
                      num_folds=1)

testset = Annotations(img_root='test/input/',
                      gt_root='test/new_bin/',
                      preprocess=preprocess_test,
                      num_folds=1)

testset.train(False)

train_loader = DataLoader(trainset, batch_size=1, shuffle=True, num_workers=8, drop_last=True)
test_loader = DataLoader(testset, batch_size=1, shuffle=False, num_workers=8, drop_last=True)


dat = trainset[0]
img = dat[0].numpy().transpose(1,2,0)
gt = dat[1].numpy()

print(img.shape)
print(gt.shape)

plt.imshow(img.squeeze())
plt.show()
plt.imshow(gt.squeeze())
plt.show()

log_dir = "log-mg3/"


def get_parameters(model, bias=False):
    import torch.nn as nn
    modules_skipped = (
        nn.ReLU,
        nn.MaxPool2d,
        nn.Dropout2d,
        nn.Sequential,
        torchfcn.models.FCN8s,
    )
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            if bias:
                yield m.bias
            else:
                yield m.weight
        elif isinstance(m, nn.ConvTranspose2d):
            # weight is frozen because it is just a bilinear upsampling
            if bias:
                assert m.bias is None
        elif isinstance(m, modules_skipped):
            continue
        else:
            raise ValueError('Unexpected module: %s' % str(m))



configurations = {
    # same configuration as original work
    # https://github.com/shelhamer/fcn.berkeleyvision.org
    1: dict(
        max_iteration=200000,
        lr=1.0e-10,
        momentum=0.99,
        weight_decay=0.0005,
        interval_validate=4000,
    )
}

cfg = configurations[1]
out = log_dir

cuda = torch.cuda.is_available()

torch.manual_seed(1337)
if cuda:
    torch.cuda.manual_seed(1337)

resume = ""

model = torchfcn.models.FCN8sAtOnce(n_class=2)
start_epoch = 0
start_iteration = 0
if resume:
    checkpoint = torch.load(resume)
    model.load_state_dict(checkpoint['model_state_dict'])
    start_epoch = checkpoint['epoch']
    start_iteration = checkpoint['iteration']
else:
    vgg16 = torchfcn.models.VGG16(pretrained=True)
    model.copy_params_from_vgg16(vgg16)
if cuda:
    model = model.cuda()

optimizer = torch.optim.SGD(
    [
        {'params': get_parameters(model, bias=False)},
        {'params': get_parameters(model, bias=True),
         'lr': cfg['lr'] * 2, 'weight_decay': 0},
    ],
    lr=cfg['lr'],
    momentum=cfg['momentum'],
    weight_decay=cfg['weight_decay'])
if resume:
    optimizer.load_state_dict(checkpoint['optim_state_dict'])

trainer = torchfcn.Trainer(
    cuda=cuda,
    model=model,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=test_loader,
    out=out,
    max_iter=cfg['max_iteration'],
    interval_validate=cfg.get('interval_validate', len(train_loader)),
)
trainer.epoch = start_epoch
trainer.iteration = start_iteration
# trainer.train()

def evaluate_model(model, data_loader):
    model.eval()

    processes = []
    mius = []

    for index in tqdm(range(len(data_loader.dataset))):
        _, gt_path = data_loader.dataset.samples[index]
        image, _ = data_loader.dataset[index]
        image = image.numpy()
        image.shape = (1, image.shape[0], image.shape[1], image.shape[2])
        prediction = np.zeros((image.shape[2], image.shape[3], 3), dtype=np.uint8)
        div_arr = np.zeros((image.shape[2], image.shape[3]), dtype=np.uint8)

        offsets_vertical = list(range(0, image.shape[2], 256))
        offsets_horizontal = list(range(0, image.shape[3], 256))

        for v in offsets_vertical:
            for h in offsets_horizontal:
                data = image[:, :, v:v+1024, h:h+1024]
                data = torch.from_numpy(data)
                data = data.cuda()
                data = Variable(data, volatile=True)
                score = model(data)
                lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :]
                lbl_pred[lbl_pred == 0] = 2
                prediction[v:v+1024, h:h+1024, 2] += lbl_pred.astype(np.uint8).squeeze()
                div_arr[v:v+1024, h:h+1024] += 1

        prediction[:,:,2] = np.round(prediction[:,:,2]/div_arr)

        im = Image.fromarray(prediction)
        prediction_path = os.path.join(log_dir, "prediction-private")
        if not os.path.isdir(prediction_path):
            os.makedirs(prediction_path)
        prediction_filename = os.path.join(prediction_path, os.path.basename(gt_path))
        im.save(prediction_filename)

        processes.append(subprocess.Popen(["java", "-jar", "DIVA_Layout_Analysis_Evaluator/out/artifacts/LayoutAnalysisEvaluator.jar", "-p", prediction_filename, "-gt", gt_path], stdout=subprocess.PIPE))

    for p in processes:
        miu = float(p.communicate()[0].splitlines()[0].split()[-1])
        mius.append(miu)

    print(mius)
    print("average:", np.mean(mius))
    return np.mean(mius)


testset = Annotations(img_root='test/input/',
                      gt_root='test/new_gt/',
                      preprocess=None,
                      num_folds=1)

testset.train(False)

test_loader = DataLoader(testset, batch_size=1, shuffle=False, num_workers=8, drop_last=True)

evaluate_model(model, test_loader)

错误：

0%|          | 0/10 [00:00<?, ?it/s]/home/harsh/anaconda3/envs/vdlproject/lib/python3.6/site-packages/ipykernel_launcher.py:23: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
100%|██████████| 10/10 [00:13<00:00,  1.28s/it]
[]
average: nan

/home/harsh/anaconda3/envs/vdlproject/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
/home/harsh/anaconda3/envs/vdlproject/lib/python3.6/site-packages/numpy/core/_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

输出：平均值：南

任何人都可以解释错误原因吗？

测试网络时出现NaN错误，这可能是什么原因？

0 个答案: