使用 Spirit qi::success 回调在规则匹配后设置字段

问题描述

我正在尝试使用 qi::on_success 回调 (here) 在匹配规则时设置字段。下面的代码稍微改编自此 code,尽管我对规则/ast 类的细微更改使其无法识别 _rule_name。我的意图在下面的代码中进行了评论。如果 term_type 规则匹配,我想将字段 TermType::literal 设置为 _literal;如果 Term::rule_name 规则匹配,我想设置为 _rule_name

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>

namespace qi = boost::spirit::qi;

namespace Ast {
    enum class TermType {
        literal,rule_name
    };


    struct Term {
        std::string data;
        TermType term_type;
    };

    using List = std::list<Term>;
    using Expression = std::list<List>;

    struct Rule {
        Term name; // lhs
        Expression rhs;
    };

    using Syntax = std::list<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Term,data)
BOOST_FUSION_ADAPT_STRUCT(Ast::Rule,name,rhs)

namespace Parser {
    template<typename Iterator>
    struct BNF : qi::grammar<Iterator,Ast::Syntax()> {
        BNF() : BNF::base_type(start) {
            using namespace qi;
            _blank = blank;
            _skipper = blank | (eol >> !skip(_blank.alias())[_rule]);
            start = skip(_skipper.alias())[_rule % +eol];

            _rule = _rule_name >> "::=" >> _expression;
            _expression = _list % '|';
            _list = +(_literal | _rule_name);
            _literal = '"' >> *(_character - '"') >> '"'
                    | "'" >> *(_character - "'") >> "'";
            _character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
            _rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';

            BOOST_SPIRIT_DEBUG_NODES(
                    (_rule)(_expression)(_list)(_literal)
                            (_character)
                            (_rule_name))
        }

        /*qi::on_success(_term,setTermTypeHandler());

        setTermTypeHandler(){
             if term is literal
                term.symbol_type = TermType::literal
            else
                term.term_type = TermType::rule_name
        }
        */

    private:
        using Skipper = qi::rule<Iterator>;
        Skipper _skipper,_blank;

        qi::rule<Iterator,Ast::Syntax()> start;
        qi::rule<Iterator,Ast::Rule(),Skipper> _rule;
        qi::rule<Iterator,Ast::Expression(),Skipper> _expression;
        qi::rule<Iterator,Ast::List(),Skipper> _list;
        // lexemes
        qi::rule<Iterator,Ast::Term()> _literal;
        qi::rule<Iterator,Ast::Term()> _rule_name;
        //  qi::rule<Iterator,std::string()>     _literal;
        qi::rule<Iterator,char()> _character;
    };
}

int main() {
    Parser::BNF<std::string::const_iterator> const parser;

    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
           | "f" | "g" | "h" | "i"
<digit>  ::= "0" | "1" | "2" | "3" |
             "4"
    )";

    auto it = input.begin(),itEnd = input.end();

    Ast::Syntax Syntax;
    if (parse(it,itEnd,parser,Syntax)) {
        for (auto &rule : Syntax) {
            std::cout << rule.name.data << " ::= ";
            std::string sep;
            for (auto &list : rule.rhs) {
                std::cout << sep;
                for (auto &term: list) { std::cout << term.data; }
                sep = " | ";
            };
            std::cout << "\n";
        }
    } else {
        std::cout << "Failed\n";
    }

    if (it != itEnd)
        std::cout << "Remaining: " << std::quoted(std::string(it,itEnd)) << "\n";
}

解决方法

由于您的结构 Term 已成为由元组 (std::string,TermType) 模拟的 Name/Literal 的可区分联合,因此我将使其同时 _literal_rule_name只需创建一个字符串,并用 qi::attr 附加 TermType。

所以,

struct Term {
    std::string data;
    TermType term_type;
};

调整两个成员

BOOST_FUSION_ADAPT_STRUCT(Ast::Term,data,term_type)

声明相关规则:

qi::rule<Iterator,Ast::Term()>   _term;
qi::rule<Iterator,std::string()> _literal;
qi::rule<Iterator,std::string()> _rule_name;

被初始化为

_list       = +_term;
_term       = _literal >> attr(Ast::TermType::literal)
            | _rule_name >> attr(Ast::TermType::rule_name);
_literal    = '"' >> *(_character - '"') >> '"'
            | "'" >> *(_character - "'") >> "'";

_character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
_rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';

这符合我的信条,即您应该尽量避免语义操作 (Boost Spirit: "Semantic actions are evil"?) 并将复杂性保持在最低限度。

on_success

我认为在这里使用 on_success 的想法是不明智的,因为它适用于非上下文相关的操作(例如将源位置绑定到每个 AST 节点,而不管类型如何)。

在这种情况下,您明确想要添加不同信息(变体鉴别器),因此最好将其注入解析器表达式的特定分支中适用于。

旁注?

你似乎通过推广类型来为自己复杂的事情 Rule::nameTerm(而不是 std::string,它曾经是 Name)。

规则的名称不能是任何其他文字,所以我建议

  1. 要么将其还原为 std::string(将其从额外类型中剥离) Name 拥有的信息)

    struct Rule {
        std::string name; // lhs 
        Expression rhs;
    };
    
  2. 使 _rule_name 直接合成为 Term(包括 TermType 进入其规则)https://godbolt.org/z/Kbb9dP

  3. 保持Term转化的两全其美 采用 Name 的构造函数:

    explicit Term(Name other)
        : data(std::move(other)),term_type(TermType::rule_name)
    { }
    

使用 ADT 进行文学编程

请注意,Name 文字类型的丢失并非没有代价,因为 output became very wrong。我建议 最后一种方法(上面的第 3 条)为您自己的变体仿真添加自定义 operator<<

friend std::ostream& operator<<(std::ostream& os,Term const& term) {
    switch(term.term_type) {
        case TermType::rule_name: return os << Name(term.data);
        case TermType::literal:   return os << std::quoted(term.data);
        default:                  return os << "?";
    }
}

现在您可以享受自己的变体类型并再次正确输出:

Live On Compiler Explorer

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>

namespace qi = boost::spirit::qi;

namespace Ast {
    struct Name : std::string {
        using std::string::string;
        using std::string::operator=;
        explicit Name(std::string s) : std::string(std::move(s)) {}

        friend std::ostream& operator<<(std::ostream& os,Name const& n) {
            return os << '<' << n.c_str() << '>';
        }
    };

    enum class TermType { literal,rule_name };

    struct Term {
        std::string data;
        TermType term_type;

        Term() = default;
        explicit Term(Name other)
            : data(std::move(other)),term_type(TermType::rule_name)
        { }

        friend std::ostream& operator<<(std::ostream& os,Term const& term) {
            switch(term.term_type) {
                case TermType::rule_name: return os << Name(term.data);
                case TermType::literal:   return os << std::quoted(term.data);
                default:                  return os << "?";
            }
        }
    };

    using List = std::list<Term>;
    using Expression = std::list<List>;

    struct Rule {
        Name name; // lhs
        Expression rhs;
    };

    using Syntax = std::list<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Term,term_type)
BOOST_FUSION_ADAPT_STRUCT(Ast::Rule,name,rhs)

namespace Parser {
    template <typename Iterator>
    struct BNF : qi::grammar<Iterator,Ast::Syntax()> {
        BNF()
            : BNF::base_type(start)
        {
            using namespace qi;
            // clang-format off
            _blank      = blank;
            _skipper    = blank | (eol >> !skip(_blank.alias()) [ _rule ]);
            start       = skip(_skipper.alias()) [ _rule % +eol ];

            _rule       = _rule_name >> "::=" >> _expression;
            _expression = _list % '|';
            _list       = +_term;
            _term       = _literal >> attr(Ast::TermType::literal)
                        | _rule_name;
            _literal    = '"' >> *(_character - '"') >> '"'
                        | "'" >> *(_character - "'") >> "'";

            _character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
            _rule_name = '<' >> qi::raw[ (alpha >> *(alnum | char_('-'))) ] >> '>';

            // clang-format on
            BOOST_SPIRIT_DEBUG_NODES(
                (_rule)(_expression)(_list)(_literal)(_character)(_rule_name))
        }

      private:
        using Skipper = qi::rule<Iterator>;
        Skipper _skipper,_blank;

        qi::rule<Iterator,Ast::Syntax()>     start;
        qi::rule<Iterator,Ast::Rule(),Skipper> _rule;
        qi::rule<Iterator,Ast::Expression(),Skipper> _expression;
        qi::rule<Iterator,Ast::List(),Skipper> _list;
        // lexemes
        qi::rule<Iterator,Ast::Term()>   _term;
        qi::rule<Iterator,std::string()> _literal;
        qi::rule<Iterator,Ast::Name()>   _rule_name;
        qi::rule<Iterator,char()>        _character;
    };
}

int main() {
    Parser::BNF<std::string::const_iterator> const parser;

    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
           | "f" | "g" | "h" | "i"
<digit>  ::= "0" | "1" | "2" | "3" |
             "4"
    )";

    auto it = input.begin(),itEnd = input.end();

    Ast::Syntax syntax;
    if (parse(it,itEnd,parser,syntax)) {
        for (auto &rule : syntax) {
            std::cout << rule.name << " ::= ";
            std::string sep;
            for (auto &list : rule.rhs) {
                std::cout << std::exchange(sep," | ");
                for (auto &term: list) { std::cout << term; }
            };
            std::cout << "\n";
        }
    } else {
        std::cout << "Failed\n";
    }

    if (it != itEnd)
        std::cout << "Remaining: " << std::quoted(std::string(it,itEnd)) << "\n";
}

印刷品

<code> ::= <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i"
<digit> ::= "0" | "1" | "2" | "3" | "4"

相关问答

Selenium Web驱动程序和Java。元素在(x,y)点处不可单击。其...
Python-如何使用点“。” 访问字典成员?
Java 字符串是不可变的。到底是什么意思?
Java中的“ final”关键字如何工作?(我仍然可以修改对象。...
“loop:”在Java代码中。这是什么,为什么要编译?
java.lang.ClassNotFoundException:sun.jdbc.odbc.JdbcOdbc...