Caffe损失从0开始并保持在0

时间:2016-06-30 13:32:36

标签: neural-network artificial-intelligence caffe conv-neural-network

我正在处理此处定义的小型CNN:http://pastebin.com/QHSxwrsT

当我开始学习过程时,它告诉我损失为0,并且在整个迭代期间它保持为0.

有没有办法解决这个问题?

这是我的解决者:

net: "Convnet/modele_CNN.prototxt"
test_iter: 100
test_interval: 100
base_lr: 0.000000001
lr_policy: "step"
gamma: 0.1
stepsize: 10
display: 1
max_iter: 450000
momentum: 0.9
weight_decay: 0.0005
snapshot: 100
snapshot_prefix: "Convnet/Feature_detect_train"
solver_mode: GPU

...和培训原型文件:

# CNN to create a probability map for the features
# Author: A. AYDIN (with very heavy contributions from C. Wolf)
# Date: 21/06/2016 

name: "Feature_detection"

# These are for training
# This layer extracts the images
layer 
{
    name: "ImageData"
    type: "Data"
    top: "data"
    top: "redundant1"
    include
    {
    phase: TRAIN
    }
    data_param
    {
        source: "Convnet/lmdbImageTrain"
        batch_size: 1
    backend: LMDB
    }
}

# This one extracts the labels (which are images)
layer
{
    name: "ImageGT"
    type: "Data"
    top: "label"
    top: "redundant2"
    include
    {
    phase: TRAIN
    }
    data_param
    {
        source: "Convnet/lmdbLabelTrain"
        batch_size: 1
    backend: LMDB
    }
}

# These are for validation
# This layer extracts the images
layer 
{
    name: "ImageData"
    type: "Data"
    top: "data"
    top: "redundant1"    
    include
    {
    phase: TEST
    }
    data_param
    {
        source: "Convnet/lmdbImageTest"
        batch_size: 1
    backend: LMDB
    }
}

# This one extracts the labels (which are images)
layer
{
    name: "ImageGT"
    type: "Data"
    top: "label"
    top: "redundant2"
    include
    {
    phase: TEST
    }
    data_param
    {
        source: "Convnet/lmdbLabelTest"
        batch_size: 1
    backend: LMDB
    }
}

# We are going to have 2 Conv+Norm+MaxPool layers 
layer
{
    name:"Conv5x5x32"
    type:"Convolution"
    bottom: "data"
    top: "conv1"
    convolution_param
    {
        kernel_size: 5 
        num_output: 32
        # The filters are 5x5x32
        pad: 2  
        # So the output is HxWx32 
    }
}

layer
{
    name: "Norm1"
    type: "LRN"
    bottom: "conv1"
    top: "norm1"
    lrn_param
    {
        # For now, I am using the default variables of:
        # local_size: 5
        # alpha: 1
        # beta: 5
        norm_region: WITHIN_CHANNEL 
    }
}

layer
{
    name: "Pool1"
    type: "Pooling"
    bottom: "norm1"
    top: "pool1"
    pooling_param
    {
        pool: MAX
        kernel_size: 2
    stride: 2
    }   
}
# The first one is done, now onto the second

layer
{
    name:"Conv3x3x64_1"
    type:"Convolution"
    bottom: "pool1"
    top: "conv2"
    convolution_param
    {
        kernel_size: 3
        num_output: 64
        # The filters are 3x3x64
        pad: 1  
        # So the output is H/2xW/2x64 
    }
}

layer
{
    name: "Norm2"
    type: "LRN"
    bottom: "conv2"
    top: "norm2"
    lrn_param
    {
        # For now, I am using the default variables of:
        # local_size: 5
        # alpha: 1
        # beta: 5
        norm_region: WITHIN_CHANNEL 
    }
}

layer
{
    name: "Pool2"
    type: "Pooling"
    bottom: "norm2"
    top: "pool2"
    pooling_param
    {
        pool: MAX
        kernel_size: 2
    stride: 2
    }   
}

# Now that we are done with the Conv+Norm+Max, we will have 3 layers of conv3x3x64

layer
{
    name:"Conv3x3x64_2"
    type:"Convolution"
    bottom: "pool2"
    top: "conv3"
    convolution_param
    {
        kernel_size: 3
        num_output: 64
        # The filters are 3x3x64
        pad: 1  
        # So the output is H/4xW/4x64 
    }
}

layer
{
    name:"Conv3x3x64_3"
    type:"Convolution"
    bottom: "conv3"
    top: "conv4"
    convolution_param
    {
        kernel_size: 3
        num_output: 64
        # The filters are 3x3x64
        pad: 1  
        # So the output is H/4xW/4x64
    }
}

layer
{
    name:"Conv3x3x64_4"
    type:"Convolution"
    bottom: "conv4"
    top: "conv5"
    convolution_param
    {
        kernel_size: 3
        num_output: 64
        # The filters are 3x3x64
        pad: 1  
        # So the output is H/4xW/4x64
    }
}

# Followed by a layers of conv3x3x256
layer
{
    name:"Conv3x3x256"
    type:"Convolution"
    bottom: "conv5"
    top: "conv6"
    convolution_param
    {
        kernel_size: 3
        num_output: 256
        # The filters are 3x3x256
        pad: 1  
        # So the output is H/4xW/4x256 
    }
}
# And lastly, a with a conv3x3x1
layer
{
    name:"Conv3x3x1"
    type:"Convolution"
    bottom: "conv6"
    top: "conv7"
    convolution_param
    {
        kernel_size: 3
        num_output: 1
        # The filters are 3x3x64
        pad: 1  
        # So the output is H/4xW/4x1 
    }
}
# We transform the last feature map into a probability map
layer
{
    name: "Prob"
    type: "Softmax"
    bottom: "conv7"
    top: "prob"
}


# Lastly we calculate the loss
layer
{
    name:"Loss"
    type: "SoftmaxWithLoss"
    bottom: "prob"
    bottom: "label"
    top: "loss"
}

0 个答案:

没有答案