Alexnet Tensorflow实现的准确性非常低

时间:2018-07-23 08:13:35

标签: python-3.x tensorflow deep-learning

我尝试自己使用基本的tensorflow python API实现Alexnet,但是在训练过程中,我得到了一些意外的输出,并且测试的准确性非常低。

训练数据集:oxford17

这是我的代码:

import tensorflow as tf
import tflearn.datasets.oxflower17 as oxflower17
from IPython import embed

# network trainning metavariables
learning_rate = 0.001
batch_size = 64
log_dir = './tflog/alexnet/'

# define input size
image_width = 224
image_height = 224
image_depth = 3
num_labels = 17

# initialize datasets
print('-----Dataset initialize start-----')
ox17_dataset, ox17_labels = oxflower17.load_data(one_hot=True)
train_dataset = ox17_dataset[:1024, :, :, :]
train_labels = ox17_labels[:1024, :]

test_dataset = ox17_dataset[1024:, :, :, :]
test_lables = ox17_labels[1024:, :]
print('Training dataset size is {}'.format(train_dataset.shape))
print('Test dataset size is {}'.format(test_dataset.shape))
print('-----Dataset initialize complete-----')


# initialize all weights and variables
def get_alexnet_variables(output_class_num):
    with tf.name_scope('conv1'):
        w1 = tf.Variable(tf.truncated_normal(shape=[11, 11, 3, 96], stddev=0.1), name='w1')
        b1 = tf.Variable(tf.zeros([96]), name='b1')
    with tf.name_scope('conv2'):
        w2 = tf.Variable(tf.truncated_normal(shape=[5, 5, 96, 256], stddev=0.1), name='w2')
        b2 = tf.Variable(tf.constant(1.0, shape=[256]), name='b2')
    with tf.name_scope('conv3'):
        w3 = tf.Variable(tf.truncated_normal(shape=[3, 3, 256, 384], stddev=0.1), name='w3')
        b3 = tf.Variable(tf.zeros([384]), name='b3')
    with tf.name_scope('conv4'):
        w4 = tf.Variable(tf.truncated_normal(shape=[3, 3, 384, 384], stddev=0.1), name='w4')
        b4 = tf.Variable(tf.constant(1.0, shape=[384]), name='b4')
    with tf.name_scope('conv5'):
        w5 = tf.Variable(tf.truncated_normal(shape=[3, 3, 384, 256], stddev=0.1), name='w5')
        b5 = tf.Variable(tf.zeros([256]), name='b5')
    with tf.name_scope('fc6'):
        w6 = tf.Variable(tf.truncated_normal(shape=[(224 // 2 ** 5) * (224 // 2 ** 5) * 256, 4096], stddev=0.1),
                        name='w6')
        b6 = tf.Variable(tf.constant(1.0, shape=[4096]), name='b6')
    with tf.name_scope('fc7'):
        w7 = tf.Variable(tf.truncated_normal(shape=[4096, 4096], stddev=0.1), name='w7')
        b7 = tf.Variable(tf.constant(1.0, shape=[4096]), name='b7')
    with tf.name_scope('fc8'):
        w8 = tf.Variable(tf.truncated_normal(shape=[4096, output_class_num], stddev=0.1), name='w8')
        b8 = tf.Variable(tf.constant(1.0, shape=[output_class_num]), name='b8')

    variables = dict(w1=w1, w2=w2, w3=w3, w4=w4, w5=w5, w6=w6, w7=w7, w8=w8, b1=b1, b2=b2, b3=b3, b4=b4, b5=b5, b6=b6,
                    b7=b7, b8=b8)

    return variables


# build alexnet
def alexnet(variables, input_data, keep_prob_for_net):
    # layer one : conventional layer
    with tf.name_scope('conv1'):
        conv1 = tf.nn.conv2d(input_data, variables['w1'], strides=[1, 4, 4, 1], padding='SAME')
        conv1 = tf.nn.bias_add(conv1, variables['b1'])
        conv1 = tf.nn.relu(conv1)
        conv1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
        conv1 = tf.nn.local_response_normalization(conv1)

    # layer two : conventional layer
    with tf.name_scope('conv2'):
        conv2 = tf.nn.conv2d(conv1, variables['w2'], strides=[1, 1, 1, 1], padding='SAME')
        conv2 = tf.nn.bias_add(conv2, variables['b2'])
        conv2 = tf.nn.relu(conv2)
        conv2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
        conv2 = tf.nn.local_response_normalization(conv2)

    # layer three: conventional layer
    with tf.name_scope('conv3'):
        conv3 = tf.nn.conv2d(conv2, variables['w3'], strides=[1, 1, 1, 1], padding='SAME')
        conv3 = tf.nn.bias_add(conv3, variables['b3'])
        conv3 = tf.nn.relu(conv3)

    # layer four: conventional layer
    with tf.name_scope('conv4'):
        conv4 = tf.nn.conv2d(conv3, variables['w4'], strides=[1, 1, 1, 1], padding='SAME')
        conv4 = tf.nn.bias_add(conv4, variables['b4'])
        conv4 = tf.nn.relu(conv4)

    # layer five: conventional layer
    with tf.name_scope('conv5'):
        conv5 = tf.nn.conv2d(conv4, variables['w5'], strides=[1, 1, 1, 1], padding='SAME')
        conv5 = tf.nn.bias_add(conv5, variables['b5'])
        conv5 = tf.nn.relu(conv5)
        conv5 = tf.nn.max_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
        conv5 = tf.nn.local_response_normalization(conv5)

    # flat data
    with tf.name_scope('flatten'):
        flatten = tf.reshape(conv5, shape=[-1, (224 // 2 ** 5) * (224 // 2 ** 5) * 256])

    # layer six: fully connected layer
    with tf.name_scope('fc6'):
        fc6 = tf.matmul(flatten, variables['w6']) + variables['b6']
        fc6 = tf.nn.tanh(fc6)
        fc6 = tf.nn.dropout(fc6, keep_prob=keep_prob_for_net)

    # layer seven: fully connected layer
    with tf.name_scope('fc7'):
        fc7 = tf.matmul(fc6, variables['w7']) + variables['b7']
        fc7 = tf.nn.tanh(fc7)
        fc7 = tf.nn.dropout(fc7, keep_prob=keep_prob_for_net)

    # layer eight: fully connected layer
    with tf.name_scope('fc8'):
        logits_output = tf.matmul(fc7, variables['w8']) + variables['b8']

    return logits_output


# define placeholder, loss and accuracy
network_input = tf.placeholder(tf.float32, shape=[None, 224, 224, 3])
true_labels = tf.placeholder(tf.float32, shape=[None, 17])
keep_prob = tf.placeholder(tf.float32)

# network output
y_ = alexnet(get_alexnet_variables(17), network_input, keep_prob)

# cross entropy loss
cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=true_labels, logits=y_))
tf.summary.scalar('cross entropy', cross_entropy_loss)

# training step
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy_loss)

# calculate accuracy
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(true_labels, 1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)

# run training process
with tf.Session() as sess:
    print('-----Training Start-----')
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(log_dir + '/train', sess.graph)
    test_writer = tf.summary.FileWriter(log_dir + '/test')
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    i = 0
    for j in range(1000):
        i += 1
        if i * batch_size > 1024:
            i = 1
        print('Train step {} running {}/1024 to {}/1024'.format(j, (i - 1) * batch_size, i * batch_size))
        batch_image = train_dataset[(i - 1) * batch_size:i * batch_size, :, :, :]
        batch_label = train_labels[(i - 1) * batch_size:i * batch_size, :]
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        # train_step.run(feed_dict={network_input: batch_image, true_labels: batch_label, keep_prob: 0.5})
        summary, _ = sess.run([merged, train_step],
        feed_dict={network_input: batch_image, true_labels: batch_label, keep_prob: 0.5},
        run_metadata=run_metadata, options=run_options)
        train_writer.add_summary(summary, j)
        if j % 10 == 0:
            print('Train Step {}, Current Test Accuracy is {}'.format(j, accuracy.eval(feed_dict={network_input:batch_image,
                                                                                        true_labels: batch_label,
                                                                                        keep_prob: 1.0})))

    print('-----Training Complete-----')
    print('-----Test Start-----')
    print('Test Accuracy is {}'.format(
        accuracy.eval(feed_dict={network_input: test_dataset, true_labels: test_lables, keep_prob: 1.0})))
    print('-----Test Complete-----')

在训练过程中,张量板显示了准确性交叉熵。结果如下:

准确性的结果: enter image description here

交叉熵 enter image description here

的结果

如您所见,精度在值1/17左右上下反弹,该值等于随机选择精度。同时,交叉熵损失大于10。

当我使用tflearn/alexnet中的代码比较结果时,损耗从1.7降低到0.9,准确度从0.1提高到0.9。所以,我必须在这里做错什么。

经过1000次迭代,测试精度为0.476,低于随机选择。

我检查了模型定义,变量声明和丢失,但找不到我得到意外结果的原因。

我还尝试将学习率更改为0.1或将批处理大小更改为10/64/128,但没有任何改变。

在此先感谢任何人的帮助。

1 个答案:

答案 0 :(得分:0)

正如@ThomasPinetz指出的那样,学习率太导致了此问题。

将学习率从0.001更改为1E-5,并将训练迭代次数从1000扩展到15000后,我从张量板得到以下结果:

精度: accuracy

交叉熵损失: cross entropy

培训后,测试准确性为 0.7410 ,无需进行微调即可接受。