Question

我以两种方式在tensorflow中定义了一个分类模型，我认为它具有相同的效果。但实际上我得到了不同的结果。第一种方法是通过函数定义模型：

def network(x, mode_name):
    conv1 = conv_layer(x, conv_size=[13, 13, 1, 32], stride_size=[1, 1, 1, 1], name=mode_name + "conv1")
    maxp1 = pooling_layer(conv1, ksize=[1, 4, 4, 1], stride_size=[1, 4, 4, 1])
    conv2 = conv_layer(maxp1, conv_size=[7, 7, 32, 64], stride_size=[1, 1, 1, 1], name=mode_name + "conv2")
    maxp2 = pooling_layer(conv2, ksize=[1, 2, 2, 1], stride_size=[1, 2, 2, 1])
    conv3 = conv_layer(maxp2, conv_size=[3, 3, 64, 256], stride_size=[1, 1, 1, 1], name=mode_name + "conv3")
    maxp3 = pooling_layer(conv3, ksize=[1, 2, 2, 1], stride_size=[1, 2, 2, 1])
    shape = maxp3.get_shape().as_list()
    reshape = tf.reshape(maxp3, [shape[0], shape[1] * shape[2] * shape[3]])
    fc = fc_layer(reshape, label_number, name=mode_name + "fc")
    return fc

def fc_layer(prev_layer, n_weight, name):
    n_prev_weight = prev_layer.get_shape()[1]
    initer = tf.truncated_normal_initializer(stddev=0.0001)
    W = tf.get_variable(name + 'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
    b = tf.get_variable(name + "b", dtype=tf.float32,
                    initializer=tf.constant(0.0001, shape=[n_weight], dtype=tf.float32))
    fc = tf.nn.bias_add(tf.matmul(prev_layer, W), b)
    return fc


def conv_layer(prev_layer, conv_size, stride_size, name):
    initer = tf.truncated_normal_initializer(stddev=0.0001)
    W = tf.get_variable(name + 'W', dtype=tf.float32, shape=conv_size,
                    initializer=initer)
    b = tf.get_variable(name + 'b', dtype=tf.float32,
                    initializer=tf.constant(0.0001, shape=[conv_size[3]], dtype=tf.float32))

    return tf.nn.relu(tf.nn.conv2d(prev_layer, W, stride_size, padding='VALID') + b)

def pooling_layer(prev_layer, ksize, stride_size):
    return tf.nn.max_pool(prev_layer, ksize=ksize, strides=stride_size, padding='VALID')

然后在main中使用这些功能：

graph = tf.Graph()

with graph.as_default():
    # input data
    tf_train_dataset = tf.placeholder(tf.float32,
                                      shape=(batch_size, image_size, image_size, image_channel))
    tf_train_labels = tf.placeholder(tf.float32,
                                     shape=(batch_size, label_number))
    tf_test_dataset = tf.constant(test_dataset)

    with tf.variable_scope("simple_cnn") as scope:
        logits = network(tf_train_dataset, "simple_cnn")

    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))

    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    train_prediction = tf.nn.softmax(logits=logits)

    with tf.variable_scope("simple_cnn") as scope:
        scope.reuse_variables()
        test_prediction = tf.nn.softmax(network(tf_test_dataset, "simple_cnn"))

另一种方法是分配所有变量并在main中定义模型：

graph = tf.Graph()

with graph.as_default():
    # input data
    tf_train_dataset = tf.placeholder(tf.float32,
                                      shape=(batch_size, image_size, image_size, image_channel))
    tf_train_labels = tf.placeholder(tf.float32,
                                     shape=(batch_size, label_number))
    tf_test_dataset = tf.constant(test_dataset)

    initer = tf.truncated_normal_initializer(stddev=0.01)
    conv_w1 = tf.get_variable(name="conv_w1", dtype=tf.float32, shape=[13, 13, 1, 32], initializer=initer)
    conv_b1 = tf.get_variable(name="conv_b1", dtype=tf.float32,
                              initializer=tf.constant(0.01, shape=[32, ], dtype=tf.float32))
    conv_w2 = tf.get_variable(name="conv_w2", dtype=tf.float32, shape=[7, 7, 32, 64], initializer=initer)
    conv_b2 = tf.get_variable(name="conv_b2", dtype=tf.float32,
                              initializer=tf.constant(0.01, shape=[64, ], dtype=tf.float32))
    conv_w3 = tf.get_variable(name="conv_w3", dtype=tf.float32, shape=[3, 3, 64, 256], initializer=initer)
    conv_b3 = tf.get_variable(name="conv_b3", dtype=tf.float32,
                              initializer=tf.constant(0.01, shape=[256, ], dtype=tf.float32))
    fc_w = tf.get_variable(name='fc_w', dtype=tf.float32, shape=[2304, label_number], initializer=initer)
    fc_b = tf.get_variable(name="fc_b", dtype=tf.float32,
                           initializer=tf.constant(0.0001, shape=[label_number, ], dtype=tf.float32))


    def model(x):
        conv1 = tf.nn.conv2d(x, conv_w1, strides=[1, 1, 1, 1], padding='VALID') + conv_b1
        relu1 = tf.nn.relu(conv1)
        maxp1 = tf.nn.max_pool(relu1, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')

        conv2 = tf.nn.conv2d(maxp1, conv_w2, strides=[1, 1, 1, 1], padding="VALID") + conv_b2
        relu2 = tf.nn.relu(conv2)
        maxp2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

        conv3 = tf.nn.conv2d(maxp2, conv_w3, strides=[1, 1, 1, 1], padding='VALID')
        relu3 = tf.nn.relu(conv3)
        maxp3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

        shape = maxp3.get_shape().as_list()

        reshape = tf.reshape(maxp3, [shape[0], shape[1] * shape[2] * shape[3]])
        fc = tf.nn.bias_add(tf.matmul(reshape, fc_w), fc_b)
        return fc

    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))

    optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

    train_prediction = tf.nn.softmax(logits=logits)

    test_prediction = tf.nn.softmax(model(tf_test_dataset))

我认为这两种方式实际上是定义相同的模型。但是当我训练他们时，第一种方式对降低成本没有影响，第二种方式定义的模型成功。不要关心模型本身，我只是想知道造成这种差异的原因是什么？输入数据都是相同的。

Answer 1

看看你的变量。

with tf.variable_scope("simple_cnn") as scope:
    scope.reuse_variables()
    test_prediction = tf.nn.softmax(network(tf_test_dataset, "simple_cnn"))

在第一个模型中，测试变量名称应该有一个额外的simple_cnn/。所以你用于测试而不是学习变量。

查看Sharing Variable示例以了解如何共享变量。在您的情况下，将所有内容放在相同的范围内（即with tf.variable_scope("simple_cnn") ...也应该放在训练模型之前）。

如何在张量流中定义模型？

1 个答案: