如何使用wfstream将UTF-16文件读入wstring

时间:2014-04-13 15:00:03

标签: c++ windows unicode utf-8

我尝试读取UTF-16文件以使用wftring wstring。但在转储内存之后,我发现它不是我想要的。例如,UTf-16文件中的“Chào”字符串是“FF FE 43 00 68 00 E0 00 6F 00”(使用十六进制编辑器)。 wstring:

[0]FF  (BOM)
[1]FE  (BOM)
[2]43
[3]00
[4]68
[5]E0
[6]00
[7]6F
[8]00

因此,使用fstream :: open,它只是逐字节读取并将其存储为wchar。但我真正想要的是一个UTF-16编码的wstring,所以wstring应该是:

[0]43
[1]68
[2]E0
[3]6F

那么如何使用wfstream正确编码读取UTF-16文件。感谢阅读:D

2 个答案:

答案 0 :(得分:-1)

也许您应该在阅读之前尝试更改编码:

const std::locale AvailLocale
  = std::locale(std::locale("Russian"), new std::codecvt_utf16<wchar_t>());

wfstream myfile;
myfile.open(...);

将语言“俄语”更改为您的计算机默认语言,它应该可以使用!

答案 1 :(得分:-1)

这是因为BOM必须以二进制形式写入/读取,而文本只是在文本模式下完成。

你可以使用这样的东西来关闭/重新打开文件,或者手动执行它。在其他地方你可能不得不使用C ++ 11或WinAPI ..想法是以二进制模式读/写bom然后以文本模式读/写文件。它的工作方式。我测试了它。否则,您将不得不进行转换。

#include <iostream>
#include <vector>
#include <fstream>

template<typename T, typename Traits = std::char_traits<T>>
class ModFStream
{
    private:
        std::string filepath;
        std::basic_fstream<T, Traits> stream;
        std::ios_base::openmode mode;

    public:
        ModFStream() : stream(), mode() {}
        ModFStream(const std::string &FilePath, std::ios_base::openmode mode) : filepath(FilePath), stream(FilePath, mode), mode(mode) {}
        ~ModFStream() {}

        inline std::basic_fstream<T, Traits>& get() {return stream;}

        void setmode(std::ios::openmode mode)
        {
            stream.close();
            stream.open(filepath, mode);
        }

        template<typename U>
        ModFStream& operator << (const U& other)
        {
            stream << other;
            return *this;
        }

        template<typename U>
        ModFStream& operator >> (U& other)
        {
            stream >> other;
            return *this;
        }
};

int main()
{
    wchar_t bom[] = L"\xFF\xFE";
    std::wstring str = L"Chào";

    ModFStream<wchar_t> stream("C:/Users/Brandon/Desktop/UTF16Test.txt", std::ios::out | std::ios::binary);
    stream << bom;
    stream.setmode(std::ios::out | std::ios::binary);
    stream << str;

    str.clear();
    stream.setmode(std::ios::in | std::ios::binary);
    stream >> bom[0] >> bom[1];

    stream.setmode(std::ios::in);
    stream >> str;

    std::wcout<<str;
}

我猜你可以写一个WinAPI fstream模拟器..

#include <iostream>
#include <vector>
#include <locale>
#include <windows.h>

namespace win
{
    template<typename T>
    struct is_wide_char : std::false_type {};

    template<>
    struct is_wide_char<wchar_t> : std::true_type {};

    enum class open_mode
    {
        app = 1L << 0,
        ate = 1L << 1,
        bin = 1L << 2,
        in = 1L << 3,
        out = 1L << 4,
        trunc = 1L << 5
    };

    enum class seek_dir
    {
        beg = 1L << 0,
        cur = 1L << 1,
        end = 1L << 2
    };

    inline constexpr open_mode operator & (open_mode a, open_mode b) {return open_mode(static_cast<int>(a) & static_cast<int>(b));}
    inline constexpr open_mode operator | (open_mode a, open_mode b) {return open_mode(static_cast<int>(a) | static_cast<int>(b));}
    inline constexpr open_mode operator ^ (open_mode a, open_mode b) {return open_mode(static_cast<int>(a) ^ static_cast<int>(b));}
    inline constexpr open_mode operator~(open_mode a) {return open_mode(~static_cast<int>(a));}
    inline const open_mode& operator |= (open_mode& a, open_mode b) {return a = a | b;}
    inline const open_mode& operator &= (open_mode& a, open_mode b) {return a = a & b;}
    inline const open_mode& operator ^= (open_mode& a, open_mode b) {return a = a ^ b;}

    template<typename T>
    std::wstring to_wide_string(const T* str)
    {
        if (is_wide_char<T>::value)
            return std::wstring(str);

        std::wstring utf16 = std::wstring(std::mbstowcs(nullptr, reinterpret_cast<const char*>(str), 0), '\0');
        std::mbstowcs(&utf16[0], reinterpret_cast<const char*>(str), utf16.size());
        return utf16;
    }

    template<typename T>
    class WinFStream
    {
        private:
            open_mode mode;
            HANDLE hFile;
            bool binary_mode = false;

        public:
            WinFStream(const T* FilePath, open_mode mode = open_mode::in | open_mode::out) : mode(mode), hFile(nullptr), binary_mode(false)
            {
                unsigned int open_flags = 0;

                if (static_cast<int>(mode & open_mode::bin))
                {
                    binary_mode = true;
                }

                if (static_cast<int>(mode & open_mode::in))
                {
                    open_flags |= GENERIC_READ;
                }
                else if (static_cast<int>(mode & open_mode::app))
                {
                    open_flags |= FILE_APPEND_DATA;
                }

                if (static_cast<int>(mode & open_mode::out))
                {
                    open_flags |= GENERIC_WRITE;
                }

                std::wstring path = to_wide_string(FilePath);
                hFile = CreateFileW(path.c_str(), open_flags, 0, nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr);

                if (static_cast<int>(mode & open_mode::ate))
                {
                    SetFilePointer(hFile, 0, nullptr, FILE_END);
                }
            }

            ~WinFStream() {CloseHandle(hFile); hFile = nullptr;}

            inline std::size_t seekg(std::size_t pos, seek_dir from)
            {
                return SetFilePointer(hFile, pos, nullptr, static_cast<int>(from) - 1);
            }

            inline std::size_t tellg()
            {
                return GetFileSize(hFile, nullptr);
            }

            void close()
            {
                CloseHandle(hFile);
                hFile = nullptr;
            }

            template<typename U>
            inline std::size_t write(const U* str, std::size_t size)
            {
                long unsigned int bytes_written = 0;
                WriteFile(hFile, &str[0], size * sizeof(U), &bytes_written, nullptr);
                return bytes_written;
            }

            template<typename U>
            inline std::size_t read(U* str, std::size_t size)
            {
                long unsigned int bytes_read = 0;
                ReadFile(hFile, &str[0], size * sizeof(U), &bytes_read, nullptr);
                return bytes_read;
            }

            template<typename U>
            WinFStream& operator << (const U &other)
            {
                this->write(&other, 1);
                return *this;
            }

            template<typename U, std::size_t size>
            WinFStream& operator << (U (&str)[size])
            {
                this->write(&str[0], size);
                return *this;
            }

            template<typename U, typename Traits = std::char_traits<U>>
            WinFStream& operator << (const std::basic_string<U, Traits>& str)
            {
                this->write(str.c_str(), str.size());
                return *this;
            }

            template<typename U>
            WinFStream& operator >> (U &other)
            {
                this->read(&other, 1);
                return *this;
            }

            template<typename U, std::size_t size>
            WinFStream& operator >> (U (&str)[size])
            {
                this->read(&str[0], size);
                return *this;
            }

            template<typename U, typename Traits = std::char_traits<U>>
            WinFStream& operator >> (std::basic_string<U, Traits>& str)
            {
                unsigned int i = 0;
                std::vector<U> buffer(512, 0);

                while(true)
                {
                    long unsigned int bytes_read = 0;
                    bool result = ReadFile(hFile, &buffer[i], sizeof(U), &bytes_read, nullptr);

                    if (std::isspace(buffer[i]) || buffer[i] == '\r' || buffer[i] == '\n')
                        break;

                    ++i;

                    if (bytes_read != sizeof(U) || !result)
                        break;
                }

                str.append(buffer.begin(), buffer.begin() + i);
                return *this;
            }
    };

    typedef WinFStream<wchar_t> WinFStreamW;
    typedef WinFStream<char> WinFStreamA;

}


using namespace win;

int main()
{
    unsigned char bom[2] = {0XFF, 0xFE};
    std::wstring str = L"Chào";

    WinFStreamW File(L"C:/Users/Brandon/Desktop/UTF16Test.txt");
    File << bom;
    File << str;


    File.seekg(0, win::seek_dir::beg);

    std::wstring str2;
    File>>bom;
    File>>str2;

    std::wcout<<str2;
}

我知道,它很脏,并且与fstream的工作方式完全相同,但值得我花时间&#34;尝试&#34;模拟它..

但是,我的运营商&lt;&lt;和&gt;&gt;没有&#34;等同于&#34;到std::fstream's ..

在以二进制模式写入bom之后,您可能最好只使用CreateFileW, ReadFile, WriteFile或以文本模式重新打开文件。

相关问题