如何在霓虹神经中加载自定义数据集

时间:2016-05-08 16:27:38

标签: python-2.7 machine-learning deep-learning nervana-neon

如果有人熟悉nervana neon,请您举例说明如何在this霓虹灯示例中加载自定义数据集。

1 个答案:

答案 0 :(得分:0)

这是一个示例数据集。您也可以查看他们的文档。稍后你会看到__iter__中的“DataSet”引用,但这只是包含一些函数生成一个项目。关键是要确保你创建连续的X,y对,在后端张量和屈服上设置它们。希望有所帮助。

import numpy as np
from data import DataSet
from operator import mul
from neon.data import NervanaDataIterator


class CustomLoader(NervanaDataIterator):
    def __init__(self, in_data, img_shape, n_classes):
        # Load the numpy data into some variables. We divide the image by 255 to normalize the values
        # between 0 and 1.
        self.shape = img_shape  # shape of the input data (e.g. for images, (C, H, W))

        # 1. assign some required and useful attributes
        self.start = 0  # start at zero
        self.ndata = in_data.shape[0]  # number of images in X (hint: use X.shape)
        self.nfeatures = reduce(mul, img_shape, 1)  # number of features in X (hint: use X.shape)

        # number of minibatches per epoch
        # to calculate this, use the batchsize, which is stored in self.be.bsz
        self.nbatches = self.ndata / self.be.bsz

        # 2. allocate memory on the GPU for a minibatch's worth of data.
        # (e.g. use `self.be` to access the backend.). See the backend documentation.
        # to get the minibatch size, use self.be.bsz
        # hint: X should have shape (# features, mini-batch size)
        # hint: use some of the attributes previously defined above
        self.dev_X = self.be.zeros((self.nfeatures, self.be.bsz))
        self.dev_Y = self.be.zeros((n_classes, self.be.bsz))
        self.data_loader = DataSet(in_data, self.be.bsz)
        self.data_loader.start()

    def reset(self):
        self.data_loader.stop()
        self.start = 0
        self.data_loader.start()

    def __iter__(self):
        # 3. loop through minibatches in the dataset
        for index in xrange(self.nbatches):
            # 3a. grab the right slice from the numpy arrays
            inputs, targets, _ = self.data_loader.batch()

            inputs = inputs.ravel()

            # The arrays X and Y data are in shape (batch_size, num_features),
            # but the iterator needs to return data with shape (num_features, batch_size).
            # here we transpose the data, and then store it as a contiguous array.
            # numpy arrays need to be contiguous before being loaded onto the GPU.
            inputs = np.ascontiguousarray(inputs.T / 255.0)
            targets = np.ascontiguousarray(targets.T)

            # here we test your implementation
            # your slice has to have the same shape as the GPU tensors you allocated
            assert inputs.shape == self.dev_X.shape, \
                "inputs has shape {}, but dev_X is {}".format(inputs.shape, self.dev_X.shape)
            assert targets.shape == self.dev_Y.shape, \
                "targets has shape {}, but dev_Y is {}".format(targets.shape, self.dev_Y.shape)

            # 3b. transfer from numpy arrays to device
            # - use the GPU memory buffers allocated previously,
            #    and call the myTensorBuffer.set() function.
            self.dev_X.set(inputs)
            self.dev_Y.set(targets)

            # 3c. yield a tuple of the device tensors.
            # X should be of shape (num_features, batch_size)
            # Y should be of shape (4, batch_size)
            yield (self.dev_X, self.dev_Y)