训练seq2seq模型时出现InvalidArgumentError

时间:2019-11-26 08:52:19

标签: seq2seq

我正在尝试使用序列对单词进行序列化模型,但是在训练过程中,我不断收到无效的参数错误。我不知道我在做什么错。请帮帮我。

下面是一个示例代码,可重现我得到的错误。

我正在使用, tensorflow 2.0.0,cudatoolkit 10.0.130,cudnn 7.6.4

import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, LSTM, Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model

x = ['this is really good',
 'i am feeling better',
 'yesterday was a bad day',
 'today is better']

y = ['<sos> Ceci est vraiment bon <eos>',
     '<sos> je me sens mieux <eos>',
     '<sos> hier était une mauvaise journée <eos>',
     "<sos> aujourd'hui c`est mieux <eos>"]

x_dict = dict(zip(np.unique(np.hstack([i.split() for i in x])), range(1, 16)))
y_dict = dict(zip(np.unique(np.hstack([i.split() for i in y])), range(1, 18)))

MAX_LEN_X = 5
MAX_LEN_Y = 7

encoder_input = np.zeros((4, MAX_LEN_X), dtype='float32')

for i, j in enumerate(x):
    for k, l in enumerate(j.split()):
        encoder_input[i, k] = x_dict[l]

decoder_input = np.zeros((4, MAX_LEN_Y), dtype='float32')
decoder_output = np.zeros((4, MAX_LEN_Y, len(y_dict)+1), dtype='float32')

for i, j in enumerate(y):
    for k, l in enumerate(j.split()):
        decoder_input[i, k] = y_dict[l]
        if k > 0:
            decoder_output[i, k - 1, y_dict[l]] = 1.

latent_dim = 30
INPUT_VOCAB = len(x_dict)
TARGET_VOCAB = len(y_dict) + 1

# Encoder
encoder_inputs = Input(shape=(None,))
encoder_emb =  Embedding(INPUT_VOCAB, latent_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs  = Input(shape=(None,))
dec_emb_layer = Embedding(TARGET_VOCAB, latent_dim, mask_zero=True)
decoder_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_emb, initial_state=encoder_states)
decoder_dense = Dense(TARGET_VOCAB, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()

这是我得到的错误的完整回溯:

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-2-f704c3b2a0b8> in <module>
      2            epochs=10,
      3            verbose=1,
----> 4            shuffle=True)

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    726         max_queue_size=max_queue_size,
    727         workers=workers,
--> 728         use_multiprocessing=use_multiprocessing)
    729 
    730   def evaluate(self,

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    322                 mode=ModeKeys.TRAIN,
    323                 training_context=training_context,
--> 324                 total_epochs=epochs)
    325             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    326 

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    121         step=step, mode=mode, size=current_batch_size) as batch_logs:
    122       try:
--> 123         batch_outs = execution_function(iterator)
    124       except (StopIteration, errors.OutOfRangeError):
    125         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
     84     # `numpy` translates Tensors to values in Eager mode.
     85     return nest.map_structure(_non_none_constant_value,
---> 86                               distributed_function(input_fn))
     87 
     88   return execution_function

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
    455 
    456     tracing_count = self._get_tracing_count()
--> 457     result = self._call(*args, **kwds)
    458     if tracing_count == self._get_tracing_count():
    459       self._call_counter.called_without_tracing()

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
    518         # Lifting succeeded, so variables are initialized and we can run the
    519         # stateless function.
--> 520         return self._stateless_fn(*args, **kwds)
    521     else:
    522       canon_args, canon_kwds = \

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
   1821     """Calls a graph function specialized to the inputs."""
   1822     graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1823     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   1824 
   1825   @property

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
   1139          if isinstance(t, (ops.Tensor,
   1140                            resource_variable_ops.BaseResourceVariable))),
-> 1141         self.captured_inputs)
   1142 
   1143   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1222     if executing_eagerly:
   1223       flat_outputs = forward_function.call(
-> 1224           ctx, args, cancellation_manager=cancellation_manager)
   1225     else:
   1226       gradient_name = self._delayed_rewrite_functions.register()

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    509               inputs=args,
    510               attrs=("executor_type", executor_type, "config_proto", config),
--> 511               ctx=ctx)
    512         else:
    513           outputs = execute.execute_with_cancellation(

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     65     else:
     66       message = e.message
---> 67     six.raise_from(core._status_to_exception(e.code, message), None)
     68   except TypeError as e:
     69     keras_symbolic_tensors = [

~/miniconda3/lib/python3.7/site-packages/six.py in raise_from(value, from_value)

InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument:  indices[3,0] = 14 is not in [0, 14)
     [[node model/embedding/embedding_lookup (defined at /home/user83/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]]
     [[loss/dense_loss/weighted_loss/broadcast_weights/assert_broadcastable/AssertGuard/else/_13/Assert/data_2/_92]]
  (1) Invalid argument:  indices[3,0] = 14 is not in [0, 14)
     [[node model/embedding/embedding_lookup (defined at /home/user83/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_distributed_function_16237]

Function call stack:
distributed_function -> distributed_function

1 个答案:

答案 0 :(得分:2)

您的代码中的错误很少。

词汇量(编码器和解码器)

如果您进行print(x_dict)操作,将会发现您的词汇量从1开始并上升到某个值(例如n)。现在,将INPUT_VOCAB设置为len(x_dict)。这使您的Embedding层缺少词汇表中最后一个单词的行。因此,只要您的模型遇到最后一个单词,您都会遇到embedding_look_up类型错误。因此,您需要设置INPUT_SIZE=len(x_dict)+1

输出形状中有两个None

我个人总是要避免这种情况。可以保留批次尺寸None。但是输出形状中有多个None是危险的。例如TensorFlow / Keras有时会进行分层重塑。如果您有多个None,您将无法恢复张量的原始形状(甚至可能不允许其进行重塑)。无论哪种方式,都不是最佳实践。因此,我在您的Input形状中设置了序列长度。

因此,更改后,您的代码将如下所示。

import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, LSTM, Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model

x = ['this is really good',
 'i am feeling better',
 'yesterday was a bad day',
 'today is better']

y = ['<sos> Ceci est vraiment bon <eos>',
     '<sos> je me sens mieux <eos>',
     '<sos> hier était une mauvaise journée <eos>',
     "<sos> aujourd'hui c`est mieux <eos>"]

x_dict = dict(zip(np.unique(np.hstack([i.split() for i in x])), range(1, 16)))
y_dict = dict(zip(np.unique(np.hstack([i.split() for i in y])), range(1, 18)))

MAX_LEN_X = 5
MAX_LEN_Y = 7

encoder_input = np.zeros((4, MAX_LEN_X), dtype='float32')

for i, j in enumerate(x):
    for k, l in enumerate(j.split()):
        encoder_input[i, k] = x_dict[l]

decoder_input = np.zeros((4, MAX_LEN_Y), dtype='float32')
decoder_output = np.zeros((4, MAX_LEN_Y, len(y_dict)+1), dtype='float32')

for i, j in enumerate(y):
    for k, l in enumerate(j.split()):
        decoder_input[i, k] = y_dict[l]
        if k > 0:
            decoder_output[i, k - 1, y_dict[l]] = 1.

latent_dim = 30
INPUT_VOCAB = len(x_dict) + 1
TARGET_VOCAB = len(y_dict) + 1

print(MAX_LEN_X, MAX_LEN_Y)
# Encoder
encoder_inputs = Input(shape=(MAX_LEN_X,))
encoder_emb =  Embedding(INPUT_VOCAB, latent_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs  = Input(shape=(MAX_LEN_Y,))
dec_emb_layer = Embedding(TARGET_VOCAB, latent_dim, mask_zero=True)
decoder_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_emb, initial_state=encoder_states)
decoder_dense = Dense(TARGET_VOCAB, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()