使用boost :: spirit解析类似c-struct的声明

时间:2015-02-04 06:20:24

标签: c++ boost boost-spirit boost-spirit-qi

我想解析一个类似c-struct的声明,它有一些标量或数组作为成员。然后可以为HDF5序列化生成具有此c-struct定义的C ++头文件。但是当我尝试同时使用boost :: spirit来处理标量和数组时,我发现了一些困难。

#include <iostream>
#include <fstream>
#include <string>
#include <vector>

#include <boost/foreach.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
namespace fusion = boost::fusion;

struct struct_field
{
    std::string type;
    std::string name;
    int dim;
};

struct struct_body
{
    std::string name;
    std::vector<struct_field> fields;
};

BOOST_FUSION_ADAPT_STRUCT(
    struct_field,
    (std::string, type)
    (std::string, name)
    (int, dim)
)

BOOST_FUSION_ADAPT_STRUCT(
    struct_body,
    (std::string, name)
    (std::vector<struct_field>, fields)
)

template <typename Iterator, typename Skipper>
struct preprocessor :
    qi::grammar<Iterator, struct_body(), Skipper>
{
    preprocessor() :
        preprocessor::base_type(body)
    {
        using namespace qi::labels;
        using qi::eol;
        using qi::lit;
        using qi::lexeme;
        using qi::int_;
        using ascii::char_;
        using phoenix::at_c;
        using phoenix::push_back;

        vartype =
            *lit(' ') >> lexeme[+(char_ - ' ') [_val += _1]];
        varname =
            (*lit(' ') >> lexeme[+(char_ - '[') [_val += _1]]) |
            (*lit(' ') >> lexeme[+(char_ - ';') [_val += _1]] >> ';');
        vardim = '[' >> int_ [_val += _1] >> "];";

        strucname =
            "declare(" >>
            lexeme[+(char_ - ')')[_val += _1]] >>
            ')' >>
            eol;

        field =
            vartype [at_c<0>(_val) = _1] >>
            varname [at_c<1>(_val) = _1] >>
            -vardim [at_c<2>(_val) = _1] >>
            eol;

        body =
            strucname [at_c<0>(_val) = _1] >>
            '(' >> eol >>
            *(field [push_back(at_c<1>(_val), _1)]) >>
            ')' >> -eol;
    }

    qi::rule<Iterator, struct_body(), Skipper> body;
    qi::rule<Iterator, struct_field(), Skipper> field;
    qi::rule<Iterator, std::string(), Skipper> strucname;
    qi::rule<Iterator, std::string(), Skipper> vartype, varname;
    qi::rule<Iterator, int(), Skipper> vardim;
};

template<typename Iterator, typename Skipper>
bool parse(Iterator &first, Iterator end, Skipper const &skipper, struct_body &mystruct)
{
    preprocessor<Iterator, Skipper> g;
    return qi::phrase_parse(first, end, g, skipper, mystruct);
}

int main(int argc, char **argv)
{
    std::string storage = "declare(grid_point)\r\n(\r\n    int id[1];\r\n    int cp[1];\r\n    double pos[3];\r\n)";
    std::string::const_iterator iter = storage.begin();
    std::string::const_iterator end = storage.end();

    struct_body mystruct;
    bool result = parse(iter, end, qi::blank, mystruct);
    if (result && iter == end)
    {
        std::cout << mystruct.fields.size() << " fields are parsed." << std::endl;
        BOOST_FOREACH(struct_field const& field, mystruct.fields)
        {
            std::cout << field.type << " : " << field.name << " [ " << field.dim << " ] ;" << std::endl;
        }
    }
}

我们可以看到,所有成员都被声明为数组。否则,无法正确解析标量。

declare(grid_point)
(
    int         id;
    int         cp;
    double      pos[3];
)

无法解析上述声明。似乎boost :: spirit总是在 [dim] 上进行激进的比赛。实际上 [dim] 只需要数组而不是标量。那么如何解决这个问题?

1 个答案:

答案 0 :(得分:2)

  • 首先,所有您的语义操作都是多余的,因为它们只是复制了标准的属性传播规则。 (Boost Spirit: "Semantic actions are evil"?)。以下内容完全等效:http://paste.ubuntu.com/10049892/

  • 你似乎对船长感到困惑。你无法有效地使用

    *lit(' ')
    

    因为已经跳过了空白

  • varname规则

    varname =
        (*lit(' ') >> lexeme[+(char_ - '[') ]) |
        (*lit(' ') >> lexeme[+(char_ - ';') ] >> ';');
    
    如果你没有[,这就会吃到行尾。这甚至包括;。修复它,例如像

    varname = lexeme[+(char_ - "[;") ];
    
  • 关于发现的船长混淆,我建议简化:

    vartype = +graph;
    varname = +(graph - char_("[;"));
    vardim = '[' >> int_  >> "]";
    

    我只是从lexeme[]Skipper另见 Boost spirit skipper issues)删除了vartype,而不是使用varname

  • 请注意,我还从';'varname规则中删除了vardim。我的意思是。说真的,';&#39;反正永远不会参与其中!

  • 相反,只需将';'放入field规则中,vardim可选:

    field =
        vartype >>
        varname >>
        -vardim >>
        ';' >> 
        eol;
    
  • 使用精神调试您的规则!

    #define BOOST_SPIRIT_DEBUG
    BOOST_SPIRIT_DEBUG_NODES((body)(field)(strucname)(varname)(vartype)(varname)(vardim))
    
  • 一般观察:语法似乎是空白不可知的。使用qi::blank作为船长是一种反模式。 (例如,我遇到了一个惊喜,因为我使用了一个原始的字符串文字,但它没有解析,因为它是以换行符开始的。)。修复这个问题留给读者练习:)

总而言之,这里有修改后的样本:

<强> Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <fstream>
#include <string>
#include <vector>

#include <boost/foreach.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

namespace qi    = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

struct struct_field
{
    std::string type;
    std::string name;
    int dim;
};

struct struct_body
{
    std::string name;
    std::vector<struct_field> fields;
};

BOOST_FUSION_ADAPT_STRUCT(
    struct_field,
    (std::string, type)
    (std::string, name)
    (int, dim)
)

BOOST_FUSION_ADAPT_STRUCT(
    struct_body,
    (std::string, name)
    (std::vector<struct_field>, fields)
)

template <typename Iterator, typename Skipper>
struct preprocessor :
    qi::grammar<Iterator, struct_body(), Skipper>
{
    preprocessor() :
        preprocessor::base_type(body)
    {
        using namespace qi::labels;
        using qi::eol;
        using qi::graph;
        using qi::lit;
        using qi::lexeme;
        using qi::int_;
        using ascii::char_;

        vartype = +graph;
        varname = +(graph - char_("[;"));
        vardim  = '[' >> int_  >> "]";

        strucname =
            "declare" >> lit('(') >> +~char_(')') >> ')' >>
            eol;

        field =
            vartype >>
            varname >>
            -vardim >>
            ';' >> 
            eol;

        body =
            strucname  >>
            '(' >> eol >>
            *field >>
            ')' >> -eol;

        BOOST_SPIRIT_DEBUG_NODES((body)(field)(strucname)(varname)(vartype)(varname)(vardim))
    }

    qi::rule<Iterator, struct_body(),  Skipper> body;
    qi::rule<Iterator, struct_field(), Skipper> field;
    qi::rule<Iterator, std::string(),  Skipper> strucname;
    qi::rule<Iterator, int(),          Skipper> vardim;
    // lexemes
    qi::rule<Iterator, std::string()> vartype, varname;
};

template<typename Iterator, typename Skipper>
bool parse(Iterator &first, Iterator end, Skipper const &skipper, struct_body &mystruct)
{
    preprocessor<Iterator, Skipper> g;
    return qi::phrase_parse(first, end, g, skipper, mystruct);
}

int main()
{
    std::string const storage = "declare(grid_point)\r\n(\r\n    int    id;\r\n    int    cp;\r\n    double pos[3];\r\n)";
    std::string::const_iterator iter = storage.begin();
    std::string::const_iterator end = storage.end();

    struct_body mystruct;
    bool result = parse(iter, end, qi::blank, mystruct);
    if (result && iter == end)
    {
        std::cout << mystruct.fields.size() << " fields are parsed." << std::endl;
        BOOST_FOREACH(struct_field const& field, mystruct.fields)
        {
            std::cout << field.type << " : " << field.name << " [ " << field.dim << " ] ;" << std::endl;
        }
    }
}

打印

3 fields are parsed.
int : id [ 0 ] ;
int : cp [ 0 ] ;
double : pos [ 3 ] ;

要拥有默认值,请将其设为

    vardim = '[' >> int_  >> "]" | qi::attr(1);
    field  = vartype >> varname >> vardim >> ';' >> eol;

在这种情况下,输出变为

3 fields are parsed.
int : id [ 1 ] ;
int : cp [ 1 ] ;
double : pos [ 3 ] ;
相关问题