如何更改logit的形状以匹配标签的形状

时间:2019-02-04 14:40:41

标签: python tensorflow

我有一个包含300张图像的自定义数据集:

  • 每个图像的形状均为192X192X3
  • 这是一个带有3个可能标签的分类问题
  • 通过提供20个批次的数据来训练模型

我已经在tensorflow中制作了一个图,但是当我尝试对其进行训练时,我得到了InvalidArgumentError: logits and labels must be broadcastable: logits_size=[720,3] labels_size=[20,3]

我知道问题是我的logits [720,3]和标签[20,3]形状不同,面临的问题是我的logits为什么以及在何处获得[720,3]形状以及如何更改相应地。改变层数,损失函数和批量大小并不能解决问题。

感谢您的帮助,谢谢!

我有一个Train类,带有build_graph()train()方法。 这是完整的代码:

class Train:
    __x_ = []
    __y_ = []
    __logits = []
    __loss = []
    __train_step = []
    __merged_summary_op = []
    __saver = []
    __session = []
    __writer = []
    __is_training = []
    __loss_val = []
    __train_summary = []
    __val_summary = []

def __init__(self):
    pass

def build_graph(self):
    self.__x_ = tf.placeholder("float", shape=[None, 192, 192, 3], name='X')
    self.__y_ = tf.placeholder("int32", shape=[None, 3], name='Y')
    self.__is_training = tf.placeholder(tf.bool)


    with tf.name_scope("model") as scope:
        conv1 = tf.layers.conv2d(inputs=self.__x_, filters=64,
                                 kernel_size=[5, 5],
                                 padding="same", activation=tf.nn.relu)
        pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

        conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding="same",
                                 activation=tf.nn.relu)

        pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

        conv3 = tf.layers.conv2d(inputs=pool2, filters=32, kernel_size=[5, 5], padding="same",
                                 activation=tf.nn.relu)

        pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)

        pool3_flat = tf.reshape(pool3, [-1, 4 * 4 * 32])

        # FC layers
        FC1 = tf.layers.dense(inputs=pool3_flat, units=128, activation=tf.nn.relu)
        FC2 = tf.layers.dense(inputs=FC1, units=64, activation=tf.nn.relu)
        self.__logits = tf.layers.dense(inputs=FC2, units=3)

        #Create variables to hold training loss __loss and validation loss 
        #__loss_val
        with tf.name_scope("loss_func") as scope:
            self.__loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.__logits, labels=self.__y_))
            self.__loss_val = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.__logits, labels=self.__y_))

            # Add loss to tensorboard
            self.__train_summary = tf.summary.scalar("loss_train", self.__loss)
            self.__val_summary = tf.summary.scalar("loss_val", self.__loss_val)


        with tf.name_scope("optimizer") as scope:
            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = 1e-3
            # decay every 10000 steps with a base of 0.96 function
            learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 1000, 0.9,
                                                       staircase=True)
            self.__train_step = tf.train.AdamOptimizer(learning_rate).minimize(self.__loss, global_step=global_step)
            tf.summary.scalar("learning_rate", learning_rate)
            tf.summary.scalar("global_step", global_step)


        # Merge op for tensorboard
        self.__merged_summary_op = tf.summary.merge_all()
        # Build graph
        init = tf.global_variables_initializer()
        # Saver for checkpoints
        self.__saver = tf.train.Saver(max_to_keep=None)


        # Configure summary to output at given directory
        self.__session = tf.Session()
        self.__writer = tf.summary.FileWriter("./logs/flight_path", self.__session.graph)
        self.__session.run(init)



def train(self, save_dir='./save', batch_size=20):

    #Load dataset and labels
    x = np.asarray(di.load_images())
    y = np.asarray(di.load_labels())

    #Shuffle dataset
    np.random.seed(0)
    shuffled_indeces = np.arange(len(y))
    np.random.shuffle(shuffled_indeces)
    shuffled_x = x[shuffled_indeces].tolist()
    shuffled_y = y[shuffled_indeces].tolist()
    shuffled_y = tf.keras.utils.to_categorical(shuffled_y, 3)

    dataset = (shuffled_x, shuffled_y)
    dataset = tf.data.Dataset.from_tensor_slices(dataset)
    #dataset = dataset.shuffle(buffer_size=300)

    # Using Tensorflow data Api to handle batches
    dataset_train = dataset.take(200)
    dataset_train = dataset_train.repeat()
    dataset_train = dataset_train.batch(batch_size)

    dataset_test = dataset.skip(200)
    dataset_test = dataset_test.repeat()
    dataset_test = dataset_test.batch(batch_size)

    # Create an iterator
    iter_train = dataset_train.make_one_shot_iterator()
    iter_train_op = iter_train.get_next()
    iter_test = dataset_test.make_one_shot_iterator()
    iter_test_op = iter_test.get_next()

    # Build model graph
    self.build_graph()

    # Train Loop
    for i in range(10000):
        batch_train = self.__session.run([iter_train_op])
        batch_x_train, batch_y_train = batch_train[0]
        # Print loss from time to time
        if i % 100 == 0:
            batch_test = self.__session.run([iter_test_op])
            batch_x_test, batch_y_test = batch_test[0]
            loss_train, summary_1 = self.__session.run([self.__loss,
                                                    self.__merged_summary_op],
                                                   feed_dict={self.__x_:
                                                                  batch_x_train,
                                                              self.__y_:
                                                                  batch_y_train,
                                                              self.__is_training: True})
            loss_val, summary_2 = self.__session.run([self.__loss_val,
                                              self.__val_summary],
                                             feed_dict={self.__x_: batch_x_test,
                                                        self.__y_: batch_y_test,
                                                        self.__is_training: False})
            print("Loss Train: {0} Loss Val: {1}".format(loss_train,
                                                 loss_val))
            # Write to tensorboard summary
            self.__writer.add_summary(summary_1, i)
            self.__writer.add_summary(summary_2, i)

        # Execute train op
        self.__train_step.run(session=self.__session, feed_dict={
            self.__x_: batch_x_train, 
            self.__y_: batch_y_train,
            self.__is_training: True})


    # Save model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        checkpoint_path = os.path.join(save_dir, "model")
        filename = self.__saver.save(self.__session, checkpoint_path)
        print("Model saved in file: %s" % filename)


if __name__ == '__main__':
    cnn = Train()
    cnn.train()

0 个答案:

没有答案