我以两种方式在tensorflow中定义了一个分类模型,我认为它具有相同的效果。但实际上我得到了不同的结果。 第一种方法是通过函数定义模型:
def network(x, mode_name):
conv1 = conv_layer(x, conv_size=[13, 13, 1, 32], stride_size=[1, 1, 1, 1], name=mode_name + "conv1")
maxp1 = pooling_layer(conv1, ksize=[1, 4, 4, 1], stride_size=[1, 4, 4, 1])
conv2 = conv_layer(maxp1, conv_size=[7, 7, 32, 64], stride_size=[1, 1, 1, 1], name=mode_name + "conv2")
maxp2 = pooling_layer(conv2, ksize=[1, 2, 2, 1], stride_size=[1, 2, 2, 1])
conv3 = conv_layer(maxp2, conv_size=[3, 3, 64, 256], stride_size=[1, 1, 1, 1], name=mode_name + "conv3")
maxp3 = pooling_layer(conv3, ksize=[1, 2, 2, 1], stride_size=[1, 2, 2, 1])
shape = maxp3.get_shape().as_list()
reshape = tf.reshape(maxp3, [shape[0], shape[1] * shape[2] * shape[3]])
fc = fc_layer(reshape, label_number, name=mode_name + "fc")
return fc
def fc_layer(prev_layer, n_weight, name):
n_prev_weight = prev_layer.get_shape()[1]
initer = tf.truncated_normal_initializer(stddev=0.0001)
W = tf.get_variable(name + 'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
b = tf.get_variable(name + "b", dtype=tf.float32,
initializer=tf.constant(0.0001, shape=[n_weight], dtype=tf.float32))
fc = tf.nn.bias_add(tf.matmul(prev_layer, W), b)
return fc
def conv_layer(prev_layer, conv_size, stride_size, name):
initer = tf.truncated_normal_initializer(stddev=0.0001)
W = tf.get_variable(name + 'W', dtype=tf.float32, shape=conv_size,
initializer=initer)
b = tf.get_variable(name + 'b', dtype=tf.float32,
initializer=tf.constant(0.0001, shape=[conv_size[3]], dtype=tf.float32))
return tf.nn.relu(tf.nn.conv2d(prev_layer, W, stride_size, padding='VALID') + b)
def pooling_layer(prev_layer, ksize, stride_size):
return tf.nn.max_pool(prev_layer, ksize=ksize, strides=stride_size, padding='VALID')
然后在main
中使用这些功能:
graph = tf.Graph()
with graph.as_default():
# input data
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size, image_size, image_channel))
tf_train_labels = tf.placeholder(tf.float32,
shape=(batch_size, label_number))
tf_test_dataset = tf.constant(test_dataset)
with tf.variable_scope("simple_cnn") as scope:
logits = network(tf_train_dataset, "simple_cnn")
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
train_prediction = tf.nn.softmax(logits=logits)
with tf.variable_scope("simple_cnn") as scope:
scope.reuse_variables()
test_prediction = tf.nn.softmax(network(tf_test_dataset, "simple_cnn"))
另一种方法是分配所有变量并在main
中定义模型:
graph = tf.Graph()
with graph.as_default():
# input data
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size, image_size, image_channel))
tf_train_labels = tf.placeholder(tf.float32,
shape=(batch_size, label_number))
tf_test_dataset = tf.constant(test_dataset)
initer = tf.truncated_normal_initializer(stddev=0.01)
conv_w1 = tf.get_variable(name="conv_w1", dtype=tf.float32, shape=[13, 13, 1, 32], initializer=initer)
conv_b1 = tf.get_variable(name="conv_b1", dtype=tf.float32,
initializer=tf.constant(0.01, shape=[32, ], dtype=tf.float32))
conv_w2 = tf.get_variable(name="conv_w2", dtype=tf.float32, shape=[7, 7, 32, 64], initializer=initer)
conv_b2 = tf.get_variable(name="conv_b2", dtype=tf.float32,
initializer=tf.constant(0.01, shape=[64, ], dtype=tf.float32))
conv_w3 = tf.get_variable(name="conv_w3", dtype=tf.float32, shape=[3, 3, 64, 256], initializer=initer)
conv_b3 = tf.get_variable(name="conv_b3", dtype=tf.float32,
initializer=tf.constant(0.01, shape=[256, ], dtype=tf.float32))
fc_w = tf.get_variable(name='fc_w', dtype=tf.float32, shape=[2304, label_number], initializer=initer)
fc_b = tf.get_variable(name="fc_b", dtype=tf.float32,
initializer=tf.constant(0.0001, shape=[label_number, ], dtype=tf.float32))
def model(x):
conv1 = tf.nn.conv2d(x, conv_w1, strides=[1, 1, 1, 1], padding='VALID') + conv_b1
relu1 = tf.nn.relu(conv1)
maxp1 = tf.nn.max_pool(relu1, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')
conv2 = tf.nn.conv2d(maxp1, conv_w2, strides=[1, 1, 1, 1], padding="VALID") + conv_b2
relu2 = tf.nn.relu(conv2)
maxp2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
conv3 = tf.nn.conv2d(maxp2, conv_w3, strides=[1, 1, 1, 1], padding='VALID')
relu3 = tf.nn.relu(conv3)
maxp3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
shape = maxp3.get_shape().as_list()
reshape = tf.reshape(maxp3, [shape[0], shape[1] * shape[2] * shape[3]])
fc = tf.nn.bias_add(tf.matmul(reshape, fc_w), fc_b)
return fc
logits = model(tf_train_dataset)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
train_prediction = tf.nn.softmax(logits=logits)
test_prediction = tf.nn.softmax(model(tf_test_dataset))
我认为这两种方式实际上是定义相同的模型。但是当我训练他们时,第一种方式对降低成本没有影响,第二种方式定义的模型成功。不要关心模型本身,我只是想知道造成这种差异的原因是什么?输入数据都是相同的。
答案 0 :(得分:0)
看看你的变量。
with tf.variable_scope("simple_cnn") as scope:
scope.reuse_variables()
test_prediction = tf.nn.softmax(network(tf_test_dataset, "simple_cnn"))
在第一个模型中,测试变量名称应该有一个额外的simple_cnn/
。所以你用于测试而不是学习变量。
查看Sharing Variable示例以了解如何共享变量。在您的情况下,将所有内容放在相同的范围内(即with tf.variable_scope("simple_cnn") ...
也应该放在训练模型之前)。