用boost :: spirit解析python语法-问题

问题描述

|| 我正在尝试使用boost :: spirit库编写一个python解析器。这是代码:
template <typename Iterator>
class Parser : public qi::grammar<Iterator,space_type>
{
public:
    Parser() : Parser::base_type(small_stmt)
    {
        NEWLINE = lit(\"<NEWLINE>\");
        INDENT = lit(\"<INDENT>\");
        DEDENT = lit(\"<DEDENT>\");
        ENDMARKER = lit(\"<EOT>\");
        NAME = identifier.alias();
        NUMBER = integer|longinteger|floatnumber|imagnumber;
        STRING = stringliteral.alias();

        identifier = (alpha | \'_\') >> *(alpha | digit | \'_\');

        stringliteral = -stringprefix  >> (shortstring | longstring);
        stringprefix = lit(\"r\") | lit(\"u\") | lit(\"ur\") | lit(\"R\") | lit(\"U\") | lit(\"UR\") | lit(\"Ur\") | lit(\"uR\") | lit(\"b\") | lit(\"B\") | lit(\"br\") | lit(\"Br\") | lit(\"bR\") | lit(\"BR\");
        shortstring =  \"\'\" >> *(shortstringitem - \"\'\") >> \"\'\" | \"\\\"\" >> *(shortstringitem - \"\\\"\") >> \"\\\"\";
        longstring = \"\'\'\'\" >> *longstringitem >> \"\'\'\'\" | \"\\\"\\\"\\\"\" >> *longstringitem >> \"\\\"\\\"\\\"\";
        shortstringitem = shortstringchar | escapeseq;
        longstringitem = longstringchar | escapeseq;
        shortstringchar = char_ - \"\\\\\" - \"\\n\";
        longstringchar = char_ - \"\\\\\";
        escapeseq = \'\\\\\' >> char_;

        longinteger = integer >> (lit(\"l\") | lit(\"L\"));
        integer = decimalinteger | octinteger | hexinteger | bininteger;
        decimalinteger = nonzerodigit >> *digit | lit(\"0\");
        octinteger = lit(\"0\") >> (lit(\"o\") | lit(\"O\")) >> +octdigit | lit(\"0\") >> +octdigit;
        hexinteger = lit(\"0\") >> (lit(\"x\") | lit(\"X\")) >> +hexdigit;
        bininteger = lit(\"0\") >> (lit(\"b\") | lit(\"B\")) >> +bindigit;
        nonzerodigit = char_(\'1\',\'9\');
        octdigit = char_(\'0\',\'7\');
        bindigit = lit(\"0\") | lit(\"1\");
        hexdigit = digit | char_(\'a\',\'f\') | char_(\'A\',\'F\');

        floatnumber = pointfloat | exponentfloat;
        pointfloat = -intpart >> fraction | intpart >> \".\";
        exponentfloat = (intpart | pointfloat) >> exponent;
        intpart = +digit;
        fraction = \".\" >> +digit;
        exponent = (lit(\"e\") | lit(\"E\")) >> -(lit(\"+\") | lit(\"-\")) >> +digit;

        imagnumber = (floatnumber | intpart) >> (lit(\"j\") | lit(\"J\"));

        single_input = NEWLINE|simple_stmt|compound_stmt >> NEWLINE;
        file_input = *(NEWLINE|stmt) >> ENDMARKER;
        eval_input = testlist >> *NEWLINE >> ENDMARKER;
        decorator = lit(\"@\") >> dotted_name >> -( lit(\"(\") >> -(arglist) >> lit(\")\") ) >> NEWLINE;
        decorators = +decorator;
        decorated = decorators >> (classdef|funcdef);
        funcdef = lit(\"def\") >> NAME >> parameters >> lit(\":\") >> suite;
        parameters = lit(\"(\") >> -(varargslist) >> lit(\")\");
        varargslist = (*(fpdef >> -(lit(\"=\") >> test) >> lit(\",\")) >> (lit(\"*\") >> NAME >> -(lit(\",\") >> lit(\"**\") >> NAME)|lit(\"**\") >> NAME)|fpdef >> -(lit(\"=\") >> test) >> *(lit(\",\") >> fpdef >> -(lit(\"=\") >> test)) >> -(lit(\",\")));
        fpdef = NAME|lit(\"(\") >> fplist >> lit(\")\");
        fplist = fpdef >> *(lit(\",\") >> fpdef) >> -(lit(\",\"));
        stmt = simple_stmt|compound_stmt;
        simple_stmt = small_stmt >> *(lit(\";\") >> small_stmt) >> -(lit(\";\")) >> NEWLINE;
        small_stmt = (expr_stmt|print_stmt|del_stmt|pass_stmt|flow_stmt|import_stmt|global_stmt|exec_stmt|assert_stmt);
        expr_stmt = testlist >> (augassign >> (yield_expr|testlist)|*(lit(\"=\") >> (yield_expr|testlist)));
        augassign = (lit(\"+=\")|lit(\"-=\")|lit(\"*=\")|lit(\"/=\")|lit(\"%=\")|lit(\"&=\")|lit(\"|=\")|lit(\"^=\")|lit(\"<<=\")|lit(\">>=\")|lit(\"**=\")|lit(\"//=\"));
        print_stmt = lit(\"print\") >> ( -( test >> *(lit(\",\") >> test) >> -(lit(\",\")) )|lit(\">>\") >> test >> -( +(lit(\",\")) ) );
        del_stmt = lit(\"del\") >> exprlist;
        pass_stmt = lit(\"pass\");
        flow_stmt = break_stmt|continue_stmt|return_stmt|raise_stmt|yield_stmt;
        break_stmt = lit(\"break\");
        continue_stmt = lit(\"continue\");
        return_stmt = lit(\"return\") >> -(testlist);
        yield_stmt = yield_expr.alias();
        raise_stmt = lit(\"raise\") >> -(test >> -(lit(\",\") >> test >> -(lit(\",\") >> test)));
        import_stmt = import_name|import_from;
        import_name = lit(\"import\") >> dotted_as_names;
        import_from = (lit(\"from\") >> (*lit(\".\") >> dotted_name|+lit(\".\")) >> lit(\"import\") >> (lit(\"*\")|lit(\"(\") >> import_as_names >> lit(\")\")|import_as_names));
        import_as_name = NAME >> -(lit(\"as\") >> NAME);
        dotted_as_name = dotted_name >> -(lit(\"as\") >> NAME);
        import_as_names = import_as_name >> *(lit(\",\") >> import_as_name) >> -(lit(\",\"));
        dotted_as_names = dotted_as_name >> *(lit(\",\") >> dotted_as_name);
        dotted_name = NAME >> *(lit(\".\") >> NAME);
        global_stmt = lit(\"global\") >> NAME >> *(lit(\",\") >> NAME);
        exec_stmt = lit(\"exec\") >> expr >> -(lit(\"in\") >> test >> -(lit(\",\") >> test));
        assert_stmt = lit(\"assert\") >> test >> -(lit(\",\") >> test);
        compound_stmt = if_stmt|while_stmt|for_stmt|try_stmt|with_stmt|funcdef|classdef|decorated;
        if_stmt = lit(\"if\") >> test >> lit(\":\") >> suite >> *(lit(\"elif\") >> test >> lit(\":\") >> suite) >> -(lit(\"else\") >> lit(\":\") >> suite);
        while_stmt = lit(\"while\") >> test >> lit(\":\") >> suite >> -(lit(\"else\") >> lit(\":\") >> suite);
        for_stmt = lit(\"for\") >> exprlist >> lit(\"in\") >> testlist >> lit(\":\") >> suite >> -(lit(\"else\") >> lit(\":\") >> suite);
        try_stmt = (lit(\"try\") >> lit(\":\") >> suite >> (+(except_clause >> lit(\":\") >> suite) >> -(lit(\"else\") >> lit(\":\") >> suite) >> -(lit(\"finally\") >> lit(\":\") >> suite)|lit(\"finally\") >> lit(\":\") >> suite));
        with_stmt = lit(\"with\") >> with_item >> *(lit(\",\") >> with_item) >> lit(\":\") >> suite;
        with_item = test >> -(lit(\"as\") >> expr);
        except_clause = lit(\"except\") >> -(test >> -((lit(\"as\")|lit(\",\")) >> test));
        suite = simple_stmt|NEWLINE >> INDENT >> +stmt >> DEDENT;
        testlist_safe = old_test >> -(+(lit(\",\") >> old_test) >> -(lit(\",\")));
        old_test = or_test|old_lambdef;
        old_lambdef = lit(\"lambda\") >> -(varargslist) >> lit(\":\") >> old_test;
        test = or_test >> -(lit(\"if\") >> or_test >> lit(\"else\") >> test)|lambdef;
        or_test = and_test >> *(lit(\"or\") >> and_test);
        and_test = not_test >> *(lit(\"and\") >> not_test);
        not_test = lit(\"not\") >> not_test|comparison;
        comparison = expr >> *(comp_op >> expr);
        comp_op = lit(\"<\")|lit(\">\")|lit(\"==\")|lit(\">=\")|lit(\"<=\")|lit(\"<>\")|lit(\"!=\")|lit(\"in\")|lit(\"not in\")|lit(\"is\")|lit(\"is not\");
        expr = xor_expr >> *(lit(\"|\") >> xor_expr);
        xor_expr = and_expr >> *(lit(\"^\") >> and_expr);
        and_expr = shift_expr >> *(lit(\"&\") >> shift_expr);
        shift_expr = arith_expr >> *((lit(\"<<\")|lit(\">>\")) >> arith_expr);
        arith_expr = term >> *((lit(\"+\")|lit(\"-\")) >> term);
        term = factor >> *((lit(\"*\")|lit(\"/\")|lit(\"%\")|lit(\"//\")) >> factor);
        factor = (lit(\"+\")|lit(\"-\")|lit(\"~\")) >> factor|power;
        power = atom >> *trailer >> -(lit(\"**\") >> factor);
        atom = (lit(\"(\") >> -(yield_expr|testlist_comp) >> lit(\")\")|lit(\"-(\") >> -(listmaker) >> lit(\")\")|lit(\"{\") >> -(dictorsetmaker) >> lit(\"}\")|lit(\"`\") >> testlist1 >> lit(\"`\")|NAME|NUMBER|+STRING);
        listmaker = test >> ( list_for|*(lit(\",\")) );
        testlist_comp = test >> ( comp_for|*(lit(\",\")) );
        lambdef = lit(\"lambda\") >> -(varargslist) >> lit(\":\") >> test;
        trailer = lit(\"(\") >> -(arglist) >> lit(\")\")|lit(\"[\") >> subscriptlist >> lit(\"]\")|lit(\".\") >> NAME;
        subscriptlist = subscript >> *(lit(\",\") >> subscript) >> -(lit(\",\"));
        subscript = lit(\".\") >> lit(\".\") >> lit(\".\")|test|-(test) >> lit(\":\") >> -(test) >> -(sliceop);
        sliceop = lit(\":\") >> -(test);
        exprlist = expr >> *(lit(\",\") >> expr) >> -(lit(\",\"));
        testlist = test >> *(lit(\",\"));
        dictorsetmaker = ( (test >> lit(\":\") >> test >> (comp_for|*(lit(\",\") >> test >> lit(\":\") >> test) >> -(lit(\",\"))))|(test >> (comp_for|*(lit(\",\")))) );
        classdef = lit(\"class\") >> NAME >> -(lit(\"(\") >> -(testlist) >> lit(\")\")) >> lit(\":\") >> suite;
        arglist = *(argument >> lit(\",\")) >> (argument >> -(lit(\",\"))|lit(\"*\") >> test >> *(lit(\",\") >> argument) >> -(lit(\",\") >> lit(\"**\") >> test)|lit(\"**\") >> test);
        argument = test >> -(comp_for)|test >> lit(\"=\") >> test;
        list_iter = list_for|list_if;
        list_for = lit(\"for\") >> exprlist >> lit(\"in\") >> testlist_safe >> -(list_iter);
        list_if = lit(\"if\") >> old_test >> -(list_iter);
        comp_iter = comp_for|comp_if;
        comp_for = lit(\"for\") >> exprlist >> lit(\"in\") >> or_test >> -(comp_iter);
        comp_if = lit(\"if\") >> old_test >> -(comp_iter);
        testlist1 = test >> *(lit(\",\") >> test);
        encoding_decl = NAME.alias();
        yield_expr = lit(\"yield\") >> -(testlist);


    }

    // LEXEMS
    qi::rule<Iterator,space_type> NEWLINE;
    qi::rule<Iterator,space_type> INDENT;
    qi::rule<Iterator,space_type> DEDENT;
    qi::rule<Iterator,space_type> ENDMARKER;
    qi::rule<Iterator,space_type> NAME;
    qi::rule<Iterator,space_type> NUMBER;
    qi::rule<Iterator,space_type> STRING;

    // IDENTIFIER
    qi::rule<Iterator,space_type> identifier;

    // STRING LITERAL
    qi::rule<Iterator,space_type> stringliteral;
    qi::rule<Iterator,space_type> stringprefix;
    qi::rule<Iterator,space_type> shortstring;
    qi::rule<Iterator,space_type> longstring;
    qi::rule<Iterator,space_type> shortstringitem;
    qi::rule<Iterator,space_type> longstringitem;
    qi::rule<Iterator,space_type> shortstringchar;
    qi::rule<Iterator,space_type> longstringchar;
    qi::rule<Iterator,space_type> escapeseq;

    // INTEGER LITERAL
    qi::rule<Iterator,space_type> longinteger;
    qi::rule<Iterator,space_type> integer;
    qi::rule<Iterator,space_type> decimalinteger;
    qi::rule<Iterator,space_type> octinteger;
    qi::rule<Iterator,space_type> hexinteger;
    qi::rule<Iterator,space_type> bininteger;
    qi::rule<Iterator,space_type> nonzerodigit;
    qi::rule<Iterator,space_type> octdigit;
    qi::rule<Iterator,space_type> bindigit;
    qi::rule<Iterator,space_type> hexdigit;

    // FLOAT LITERAL
    qi::rule<Iterator,space_type> floatnumber;
    qi::rule<Iterator,space_type> pointfloat;
    qi::rule<Iterator,space_type> exponentfloat;
    qi::rule<Iterator,space_type> intpart;
    qi::rule<Iterator,space_type> fraction;
    qi::rule<Iterator,space_type> exponent;

    //IMAGINARY LITERAL
    qi::rule<Iterator,space_type> imagnumber;

    // PYTHON GRAMMAR
    qi::rule<Iterator,space_type> single_input;
    qi::rule<Iterator,space_type> file_input;
    qi::rule<Iterator,space_type> eval_input;
    qi::rule<Iterator,space_type> decorator;
    qi::rule<Iterator,space_type> decorators;
    qi::rule<Iterator,space_type> decorated;
    qi::rule<Iterator,space_type> funcdef;
    qi::rule<Iterator,space_type> parameters;
    qi::rule<Iterator,space_type> varargslist;
    qi::rule<Iterator,space_type> fpdef;
    qi::rule<Iterator,space_type> fplist;
    qi::rule<Iterator,space_type> stmt;
    qi::rule<Iterator,space_type> simple_stmt;
    qi::rule<Iterator,space_type> small_stmt;
    qi::rule<Iterator,space_type> expr_stmt;
    qi::rule<Iterator,space_type> augassign;
    qi::rule<Iterator,space_type> print_stmt;
    qi::rule<Iterator,space_type> del_stmt;
    qi::rule<Iterator,space_type> pass_stmt;
    qi::rule<Iterator,space_type> flow_stmt;
    qi::rule<Iterator,space_type> break_stmt;
    qi::rule<Iterator,space_type> continue_stmt;
    qi::rule<Iterator,space_type> return_stmt;
    qi::rule<Iterator,space_type> yield_stmt;
    qi::rule<Iterator,space_type> raise_stmt;
    qi::rule<Iterator,space_type> import_stmt;
    qi::rule<Iterator,space_type> import_name;
    qi::rule<Iterator,space_type> import_from;
    qi::rule<Iterator,space_type> import_as_name;
    qi::rule<Iterator,space_type> dotted_as_name;
    qi::rule<Iterator,space_type> import_as_names;
    qi::rule<Iterator,space_type> dotted_as_names;
    qi::rule<Iterator,space_type> dotted_name;
    qi::rule<Iterator,space_type> global_stmt;
    qi::rule<Iterator,space_type> exec_stmt;
    qi::rule<Iterator,space_type> assert_stmt;
    qi::rule<Iterator,space_type> compound_stmt;
    qi::rule<Iterator,space_type> if_stmt;
    qi::rule<Iterator,space_type> while_stmt;
    qi::rule<Iterator,space_type> for_stmt;
    qi::rule<Iterator,space_type> try_stmt;
    qi::rule<Iterator,space_type> with_stmt;
    qi::rule<Iterator,space_type> with_item;
    qi::rule<Iterator,space_type> except_clause;
    qi::rule<Iterator,space_type> suite;
    qi::rule<Iterator,space_type> testlist_safe;
    qi::rule<Iterator,space_type> old_test;
    qi::rule<Iterator,space_type> old_lambdef;
    qi::rule<Iterator,space_type> test;
    qi::rule<Iterator,space_type> or_test;
    qi::rule<Iterator,space_type> and_test;
    qi::rule<Iterator,space_type> not_test;
    qi::rule<Iterator,space_type> comparison;
    qi::rule<Iterator,space_type> comp_op;
    qi::rule<Iterator,space_type> expr;
    qi::rule<Iterator,space_type> xor_expr;
    qi::rule<Iterator,space_type> and_expr;
    qi::rule<Iterator,space_type> shift_expr;
    qi::rule<Iterator,space_type> arith_expr;
    qi::rule<Iterator,space_type> term;
    qi::rule<Iterator,space_type> factor;
    qi::rule<Iterator,space_type> power;
    qi::rule<Iterator,space_type> atom;
    qi::rule<Iterator,space_type> listmaker;
    qi::rule<Iterator,space_type> testlist_comp;
    qi::rule<Iterator,space_type> lambdef;
    qi::rule<Iterator,space_type> trailer;
    qi::rule<Iterator,space_type> subscriptlist;
    qi::rule<Iterator,space_type> subscript;
    qi::rule<Iterator,space_type> sliceop;
    qi::rule<Iterator,space_type> exprlist;
    qi::rule<Iterator,space_type> testlist;
    qi::rule<Iterator,space_type> dictorsetmaker;
    qi::rule<Iterator,space_type> classdef;
    qi::rule<Iterator,space_type> arglist;
    qi::rule<Iterator,space_type> argument;
    qi::rule<Iterator,space_type> list_iter;
    qi::rule<Iterator,space_type> list_for;
    qi::rule<Iterator,space_type> list_if;
    qi::rule<Iterator,space_type> comp_iter;
    qi::rule<Iterator,space_type> comp_for;
    qi::rule<Iterator,space_type> comp_if;
    qi::rule<Iterator,space_type> testlist1;
    qi::rule<Iterator,space_type> encoding_decl;
    qi::rule<Iterator,space_type> yield_expr;
};
问题是当我尝试解析简单文件时:
pass
经过一些词法分析器模块后的女巫是:
pass <NEWLINE> <EOT>
解析失败,并在第一个字符处停止。 当我尝试使用pass_stmt规则解析此文件时,一切正常(除了我们仍然拥有和剩余,但密码为\“ consumed \”)。 当我尝试使用规则上一级解析它时-small_stmt-解析器停止在
> <EOT>
消耗
pass <NEWLINE
上一级-simple_stmt提供与file_input相同的结果-解析器在第一个字符处停止。 在添加在PYTHON GRAMMAR部分(从http://docs.python.org/reference/grammar.html获取)中定义的语法之前,所有功能都正常运行。解析器识别的标识符,文字,数字等 有人知道这里有什么问题吗?     

解决方法

        我建议您按照此处的说明启用调试。这将使您洞悉实际情况。通常,我建议逐步构建语法,而不要尝试一次大的实现。 您上面提供的代码对于不团结的读者来说很难理解,因为它很大且没有注释。编写语法非常类似于编写“普通”代码。封装是成功的关键。尝试构建覆盖独立部分的较小语法,并根据需要组合这些子语法。有关最佳做法,请参见此处。     

相关问答

依赖报错 idea导入项目后依赖报错,解决方案:https://blog....
错误1:代码生成器依赖和mybatis依赖冲突 启动项目时报错如下...
错误1:gradle项目控制台输出为乱码 # 解决方案:https://bl...
错误还原:在查询的过程中,传入的workType为0时,该条件不起...
报错如下,gcc版本太低 ^ server.c:5346:31: 错误:‘struct...