Little Endian阅读MNIST文件。数字超出范围

时间:2014-04-23 19:27:08

标签: c++ mnist

我正在尝试阅读ubyte格式。我验证了我在一台小端机上并将这些位转换为小端。但问题是我在转换后在文件中看到的数字高于255.

有什么想法吗?我从中得到了代码 How to read MNIST data in C++? 并对其进行了一些修改,但这是我的代码

if (fin.is_open())
{
    int magic_number=0;
    int number_of_images=0;
    int n_rows=0;
    int n_cols=0;
    fin.read((char*)&magic_number,sizeof(magic_number));
    magic_number= flipBytes(magic_number);
    fin.read((char*)&number_of_images,sizeof(number_of_images));
    number_of_images= flipBytes(number_of_images);
    fin.read((char*)&n_rows,sizeof(n_rows));
    n_rows= flipBytes(n_rows);
    fin.read((char*)&n_cols,sizeof(n_cols));
    n_cols= flipBytes(n_cols);

    if(fin2.is_open())
    {
        int magic_number_label = 0;
        int number_of_items = 0;

        fin2.read((char*)&magic_number_label, sizeof(magic_number_label));
        magic_number_label = flipBytes(magic_number_label);
        fin2.read((char*)&number_of_items, sizeof(number_of_items));
        number_of_items = flipBytes(number_of_items);

        log << "Magic Number: " << magic_number << "\n";
        log << "Number of Images: " << number_of_images << "\n";
        log << "Number of Rows: " << n_rows << "\n";
        log << "Number of Cols: " << n_cols << "\n";
        log << "------------------------------\n";
        log << "Magic Number Label: " << magic_number_label << "\n";
        log << "Number of Items: " << number_of_items << "\n";

        for(int i=0;i<number_of_images;++i)
        {
            for(int r=0;r<n_rows;++r)
            {
                for(int c=0;c<n_cols;++c)
                {
                    unsigned int temp=0;
                    fin.read((char*)&temp,sizeof(temp));
                    temp = flipBytes(temp);

                    fout << temp << ", ";
                }
            }

            //ATTACH ANSWER
            unsigned int temp;
            fin2.read((char*)&temp, sizeof(temp));
            temp = flipBytes(temp);

            fout << temp << '\n';
        }
    }
}


int UbyteExtractor::flipBytes(int i)
{
    unsigned char c1, c2, c3, c4;

    c1 = i & 255;
    c2 = (i >> 8) & 255;
    c3 = (i >> 16) & 255;
    c4 = (i >> 24) & 255;

    return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4;
}

示例输出snipet

0   0   0   0   0   0   303174147   447711358   2146959270  0   0   0   2589860894  4261281194  2900491773  1086583549  0   0   822083584   4261281262  4261281277  1381891069  2570322

1 个答案:

答案 0 :(得分:1)

@mrgloom's original answer每个读取1个字节时,您从文件中读取4个字节。这是一个巨大的差异。

mrgloom:

                unsigned char temp=0;
                file.read((char*)&temp,sizeof(temp));

您:

                unsigned int temp=0;
                fin.read((char*)&temp,sizeof(temp));
                temp = flipBytes(temp);

                fout << temp << ", ";

也许您想将像素的数值打印到控制台?然后在打印之前将字节值转换为(unsigned)int:

                unsigned char temp=0;
                fin.read((char*)&temp,sizeof(temp));

                fout << unsigned(temp) << ", ";

顺便说一下,仅供参考,有用于字节顺序交换的标准库函数:

更新

这是一个有效的解决方案(基本上与@ mrgloom的代码相同)

#include <iostream>
#include <fstream>

using namespace std;

#include <arpa/inet.h>

#define ERROR() do {                                            \
    cout << "I/O error at " << __func__ << ": " << __LINE__ <<  \
        " (offset " << file.tellg() << ")" << endl;             \
    return;                                                     \
} while (0)

static void read_mnist(const string &full_path)
{
    ifstream file (full_path.c_str(), ios::binary);
    if ( ! file)
        ERROR();

    int magic_number=0;
    int number_of_images=0;
    int n_rows=0;
    int n_cols=0;
    if ( ! file.read((char*)&magic_number,sizeof(magic_number)))
        ERROR();
    magic_number= ntohl(magic_number);
    if ( ! file.read((char*)&number_of_images,sizeof(number_of_images)))
        ERROR();
    number_of_images= ntohl(number_of_images);
    if ( ! file.read((char*)&n_rows,sizeof(n_rows)))
        ERROR();
    n_rows= ntohl(n_rows);
    if ( ! file.read((char*)&n_cols,sizeof(n_cols)))
        ERROR();
    n_cols= ntohl(n_cols);

    cout << "Mgc==" << magic_number << "; NImg==" << number_of_images << "; "
        "NRow==" << n_rows << "; NCol==" << n_cols << endl;
    for(int i=0;i<number_of_images;++i)
    {
        cout << '[' << endl;
        for(int r=0;r<n_rows;++r)
        {
            cout << '\t' << '[' ;
            for(int c=0;c<n_cols;++c)
            {
                unsigned char temp=0;
                if ( ! file.read((char*)&temp,sizeof(temp)))
                    ERROR();
                cout << unsigned(temp) << ' ' ;
            }
            cout << ']' << endl;
        }
        cout << ']' << endl;
    }
}

int main(int argc, char *argv[]) {
    cout << "Process [" << argv[1] << "]" << endl;
    read_mnist(argv[1]);
}

文稿:

$ wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
(snip)
$ gunzip t10k-images-idx3-ubyte.gz
$ ./mrgloom t10k-images-idx3-ubyte
Process [t10k-images-idx3-ubyte]
Mgc==2051; NImg==10000; NRow==28; NCol==28
[
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 84 185 159 151 60 36 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 222 254 254 254 254 241 198 198 198 198 198 198 198 198 170 52 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 67 114 72 114 163 227 254 225 254 254 254 250 229 254 254 140 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 17 66 14 67 67 67 59 21 236 254 106 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 83 253 209 18 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 233 255 83 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 129 254 238 44 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 59 249 254 62 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 133 254 187 5 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 205 248 58 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 126 254 182 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 75 251 240 57 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 19 221 254 166 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 3 203 254 219 35 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 38 254 254 77 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 31 224 254 115 1 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 133 254 254 52 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 61 242 254 254 52 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 121 254 254 219 40 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 121 254 207 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
        [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
]
[
(snip)