Question

我对TensorFlow初始化有一个奇怪的问题。我有一个计算图表可以做一些结果，如果我添加一个实际上没有连接到我正在测试的输入/输出流的常量，即使random_seed是相同的，我还有其他结果。任何提示？以下是代码以及如何重现该问题：

import collections
import logging
import sys

import tensorflow as tf

BATCH_SIZE = None
SENTENCE_LENGTH = None
FORMULA_LENGTH = None
VOCABULARY_SIZE = None
SWITCH_SIZE = None
EMISSION_SIZE = None
POINTING_SIZE = None
EMBEDDING_SIZE = None
HIDDEN_SIZE = None

class Model(object):
    """Base model."""

    def __init__(self, trainable=True):
        """Initialize a model instance."""
        self._trainable = trainable
        input_shape = [BATCH_SIZE, None]
        output_shape = [BATCH_SIZE, None]
        self._words = tf.placeholder(dtype=tf.int32, shape=input_shape)
        self._switch = tf.placeholder(dtype=tf.int32, shape=output_shape)
        self._emission = tf.placeholder(dtype=tf.int32, shape=output_shape)
        self._mask = tf.placeholder(dtype=tf.float32, shape=output_shape)
        self._sentence_length = tf.placeholder(
            dtype=tf.int32, shape=[BATCH_SIZE])
        self._formula_length = tf.placeholder(
            dtype=tf.int32, shape=[BATCH_SIZE])

        with tf.device('CPU:0'):
            self._gs = tf.get_variable(
                'global_step', dtype=tf.int32, initializer=0)

        with tf.variable_scope('Embedding') as scope:
            with tf.device('CPU:0'):
                embeddings_shape = [VOCABULARY_SIZE, EMBEDDING_SIZE]
                embeddings = tf.get_variable('E', embeddings_shape)
                inputs = tf.nn.embedding_lookup(embeddings, self._words)

        with tf.variable_scope('Encoder') as scope:
            enc_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE)
            enc_state = enc_cell.zero_state(BATCH_SIZE, tf.float32)
            enc_output, _ = tf.nn.dynamic_rnn(
                enc_cell, inputs,
                sequence_length=self._sentence_length,
                initial_state=enc_state, scope=scope)
            slices = []
            for index, length in enumerate(tf.unpack(self._sentence_length)):
                slice_ = tf.slice(enc_output,
                                  begin=[index, length - 1, 0],
                                  size=[1, 1, HIDDEN_SIZE])
                slices.append(slice_)
            last_enc_out = tf.concat(0, slices)

        with tf.variable_scope('Decoder') as scope:
            self._ll = tf.shape(self._switch)[1]
            dec_inputs = tf.tile(last_enc_out, [1, self._ll, 1])  # <- QUI!
            dec_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE)
            dec_state = dec_cell.zero_state(BATCH_SIZE, tf.float32)
            dec_output, _ = tf.nn.dynamic_rnn(
                dec_cell, dec_inputs,
                sequence_length=self._formula_length,
                initial_state=dec_state, scope=scope)

        with tf.variable_scope('Switch') as scope:
            switch_input = tf.reshape(dec_output, [-1, HIDDEN_SIZE])
            switch_w = tf.get_variable('W', [HIDDEN_SIZE, SWITCH_SIZE])
            switch_b = tf.get_variable('b', [SWITCH_SIZE])
            switch_logits = tf.matmul(switch_input, switch_w) + switch_b
            switch_logits = tf.reshape(
                switch_logits, [BATCH_SIZE, -1, SWITCH_SIZE])
            self._switch_probs = tf.nn.softmax(switch_logits, dim=-1)
            self._switch_predict = tf.argmax(self._switch_probs, axis=2)
            switch_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                switch_logits, self._switch)
            # to have a different initialization in Emission/W
            # and Emission/b, UNCOMMENT THE FOLLOWING LINE
            # switch_actual_one = tf.equal(self._switch, 1)
            switch_losses = self._mask * switch_losses
            self._switch_loss = tf.reduce_sum(switch_losses)

        with tf.variable_scope('Emission') as scope:
            emission_input = tf.reshape(dec_output, [-1, HIDDEN_SIZE])
            emission_w = tf.get_variable('W', [HIDDEN_SIZE, EMISSION_SIZE])
            emission_b = tf.get_variable('b', [EMISSION_SIZE])
            emission_logits = tf.matmul(emission_input, emission_w) + emission_b
            emission_logits = tf.reshape(emission_logits, [BATCH_SIZE, -1, EMISSION_SIZE])
            self._emission_probs = tf.nn.softmax(emission_logits, dim=-1)
            self._emission_predict = tf.argmax(self._emission_probs, axis=2)
            emission_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                emission_logits, self._emission)
            # mask and sum the losses
            emission_losses = self._mask * emission_losses
            self._emission_loss = tf.reduce_sum(emission_losses)

        self._loss = self._switch_loss  + self._emission_loss
        if trainable:
            with tf.variable_scope('Backprop') as scope:
                optimizer = tf.train.AdadeltaOptimizer(
                    learning_rate=0.005, rho=0.95, epsilon=1e-6)
                self._train_op = optimizer.minimize(
                    self._loss, self._gs, colocate_gradients_with_ops=True)
        self._variables = tf.global_variables()
        for var in self._variables:
            logging.debug(var.name + ' -- ' + var.device)

if __name__ == '__main__':
    BATCH_SIZE = 1
    SENTENCE_LENGTH = 8
    FORMULA_LENGTH = 5
    VOCABULARY_SIZE = 10
    SWITCH_SIZE = 2
    EMISSION_SIZE = 5
    POINTING_SIZE = 8
    EMBEDDING_SIZE = 5
    HIDDEN_SIZE = 10

    tf.reset_default_graph()
    tf.set_random_seed(23)

    INSTANCE = Model(len(sys.argv) > 1)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for v in INSTANCE._variables:
            print v.name + ': ' + str(v.get_shape())
            data = sess.run(v)
            print data

将代码放在model.py模块中运行：

:~$ python model.py > ref.txt

现在取消注释代码中上面指定的行并运行：

:~$ python model.py > mod.txt
:~$ diff ref.txt mod.txt

你应该看到：

272,281c272,281
< [[ 0.15272647 -0.43774983 -0.43797731 -0.24265891  0.32974124]
<  [ 0.00733912 -0.07849193 -0.43016246 -0.48970658  0.19110513]
<  [ 0.02099371 -0.38966972 -0.49712265  0.46021473 -0.04306412]
<  [ 0.40600681 -0.53819847  0.23261058  0.19120842 -0.16637936]
<  [-0.30048463  0.23391896  0.28050232  0.46709561 -0.50859016]
<  [-0.01186764 -0.4364894  -0.45374861 -0.18917233 -0.29747942]
<  [ 0.37092978  0.28236824 -0.32479095  0.19327551  0.13849157]
<  [ 0.16678107 -0.4084509   0.05273259  0.0044086   0.18909204]
<  [ 0.45275509  0.11965656 -0.21034014  0.26717472 -0.1948047 ]
<  [-0.21880585 -0.24638626  0.49764216  0.21117538 -0.14970052]]
---
> [[ 0.02334166 -0.53902954  0.49798179 -0.09884384 -0.21991399]
>  [ 0.16418862 -0.43838659  0.02976686  0.32583421 -0.43007952]
>  [ 0.03031725 -0.10049745 -0.14239612 -0.40359342 -0.23511672]
>  [ 0.10826033  0.1075694   0.0359624   0.38421327  0.22342587]
>  [-0.19174138  0.27111518 -0.31547174  0.1219362   0.43017918]
>  [ 0.37019521  0.40931159 -0.23115548 -0.46872306 -0.23347196]
>  [-0.36434412 -0.33802703  0.33611691  0.42583901  0.17518502]
>  [-0.01159394 -0.31617939 -0.29969472 -0.01011997  0.53662634]
>  [ 0.53476465 -0.02670521  0.46675134  0.10126555  0.20955974]
>  [-0.29040447  0.16516399  0.52025235 -0.5020656  -0.40593004]]
283c283
< [-0.16011143  0.4890095   1.21327794 -0.48182666  0.34403455]
---
> [-0.23730683 -0.43528473 -1.56616378  0.87664163 -0.72844386]

TensorFlow中的稳定初始化

0 个答案: