问题描述
||
我正在尝试使用boost :: spirit库编写一个python解析器。这是代码:
template <typename Iterator>
class Parser : public qi::grammar<Iterator,space_type>
{
public:
Parser() : Parser::base_type(small_stmt)
{
NEWLINE = lit(\"<NEWLINE>\");
INDENT = lit(\"<INDENT>\");
DEDENT = lit(\"<DEDENT>\");
ENDMARKER = lit(\"<EOT>\");
NAME = identifier.alias();
NUMBER = integer|longinteger|floatnumber|imagnumber;
STRING = stringliteral.alias();
identifier = (alpha | \'_\') >> *(alpha | digit | \'_\');
stringliteral = -stringprefix >> (shortstring | longstring);
stringprefix = lit(\"r\") | lit(\"u\") | lit(\"ur\") | lit(\"R\") | lit(\"U\") | lit(\"UR\") | lit(\"Ur\") | lit(\"uR\") | lit(\"b\") | lit(\"B\") | lit(\"br\") | lit(\"Br\") | lit(\"bR\") | lit(\"BR\");
shortstring = \"\'\" >> *(shortstringitem - \"\'\") >> \"\'\" | \"\\\"\" >> *(shortstringitem - \"\\\"\") >> \"\\\"\";
longstring = \"\'\'\'\" >> *longstringitem >> \"\'\'\'\" | \"\\\"\\\"\\\"\" >> *longstringitem >> \"\\\"\\\"\\\"\";
shortstringitem = shortstringchar | escapeseq;
longstringitem = longstringchar | escapeseq;
shortstringchar = char_ - \"\\\\\" - \"\\n\";
longstringchar = char_ - \"\\\\\";
escapeseq = \'\\\\\' >> char_;
longinteger = integer >> (lit(\"l\") | lit(\"L\"));
integer = decimalinteger | octinteger | hexinteger | bininteger;
decimalinteger = nonzerodigit >> *digit | lit(\"0\");
octinteger = lit(\"0\") >> (lit(\"o\") | lit(\"O\")) >> +octdigit | lit(\"0\") >> +octdigit;
hexinteger = lit(\"0\") >> (lit(\"x\") | lit(\"X\")) >> +hexdigit;
bininteger = lit(\"0\") >> (lit(\"b\") | lit(\"B\")) >> +bindigit;
nonzerodigit = char_(\'1\',\'9\');
octdigit = char_(\'0\',\'7\');
bindigit = lit(\"0\") | lit(\"1\");
hexdigit = digit | char_(\'a\',\'f\') | char_(\'A\',\'F\');
floatnumber = pointfloat | exponentfloat;
pointfloat = -intpart >> fraction | intpart >> \".\";
exponentfloat = (intpart | pointfloat) >> exponent;
intpart = +digit;
fraction = \".\" >> +digit;
exponent = (lit(\"e\") | lit(\"E\")) >> -(lit(\"+\") | lit(\"-\")) >> +digit;
imagnumber = (floatnumber | intpart) >> (lit(\"j\") | lit(\"J\"));
single_input = NEWLINE|simple_stmt|compound_stmt >> NEWLINE;
file_input = *(NEWLINE|stmt) >> ENDMARKER;
eval_input = testlist >> *NEWLINE >> ENDMARKER;
decorator = lit(\"@\") >> dotted_name >> -( lit(\"(\") >> -(arglist) >> lit(\")\") ) >> NEWLINE;
decorators = +decorator;
decorated = decorators >> (classdef|funcdef);
funcdef = lit(\"def\") >> NAME >> parameters >> lit(\":\") >> suite;
parameters = lit(\"(\") >> -(varargslist) >> lit(\")\");
varargslist = (*(fpdef >> -(lit(\"=\") >> test) >> lit(\",\")) >> (lit(\"*\") >> NAME >> -(lit(\",\") >> lit(\"**\") >> NAME)|lit(\"**\") >> NAME)|fpdef >> -(lit(\"=\") >> test) >> *(lit(\",\") >> fpdef >> -(lit(\"=\") >> test)) >> -(lit(\",\")));
fpdef = NAME|lit(\"(\") >> fplist >> lit(\")\");
fplist = fpdef >> *(lit(\",\") >> fpdef) >> -(lit(\",\"));
stmt = simple_stmt|compound_stmt;
simple_stmt = small_stmt >> *(lit(\";\") >> small_stmt) >> -(lit(\";\")) >> NEWLINE;
small_stmt = (expr_stmt|print_stmt|del_stmt|pass_stmt|flow_stmt|import_stmt|global_stmt|exec_stmt|assert_stmt);
expr_stmt = testlist >> (augassign >> (yield_expr|testlist)|*(lit(\"=\") >> (yield_expr|testlist)));
augassign = (lit(\"+=\")|lit(\"-=\")|lit(\"*=\")|lit(\"/=\")|lit(\"%=\")|lit(\"&=\")|lit(\"|=\")|lit(\"^=\")|lit(\"<<=\")|lit(\">>=\")|lit(\"**=\")|lit(\"//=\"));
print_stmt = lit(\"print\") >> ( -( test >> *(lit(\",\") >> test) >> -(lit(\",\")) )|lit(\">>\") >> test >> -( +(lit(\",\")) ) );
del_stmt = lit(\"del\") >> exprlist;
pass_stmt = lit(\"pass\");
flow_stmt = break_stmt|continue_stmt|return_stmt|raise_stmt|yield_stmt;
break_stmt = lit(\"break\");
continue_stmt = lit(\"continue\");
return_stmt = lit(\"return\") >> -(testlist);
yield_stmt = yield_expr.alias();
raise_stmt = lit(\"raise\") >> -(test >> -(lit(\",\") >> test >> -(lit(\",\") >> test)));
import_stmt = import_name|import_from;
import_name = lit(\"import\") >> dotted_as_names;
import_from = (lit(\"from\") >> (*lit(\".\") >> dotted_name|+lit(\".\")) >> lit(\"import\") >> (lit(\"*\")|lit(\"(\") >> import_as_names >> lit(\")\")|import_as_names));
import_as_name = NAME >> -(lit(\"as\") >> NAME);
dotted_as_name = dotted_name >> -(lit(\"as\") >> NAME);
import_as_names = import_as_name >> *(lit(\",\") >> import_as_name) >> -(lit(\",\"));
dotted_as_names = dotted_as_name >> *(lit(\",\") >> dotted_as_name);
dotted_name = NAME >> *(lit(\".\") >> NAME);
global_stmt = lit(\"global\") >> NAME >> *(lit(\",\") >> NAME);
exec_stmt = lit(\"exec\") >> expr >> -(lit(\"in\") >> test >> -(lit(\",\") >> test));
assert_stmt = lit(\"assert\") >> test >> -(lit(\",\") >> test);
compound_stmt = if_stmt|while_stmt|for_stmt|try_stmt|with_stmt|funcdef|classdef|decorated;
if_stmt = lit(\"if\") >> test >> lit(\":\") >> suite >> *(lit(\"elif\") >> test >> lit(\":\") >> suite) >> -(lit(\"else\") >> lit(\":\") >> suite);
while_stmt = lit(\"while\") >> test >> lit(\":\") >> suite >> -(lit(\"else\") >> lit(\":\") >> suite);
for_stmt = lit(\"for\") >> exprlist >> lit(\"in\") >> testlist >> lit(\":\") >> suite >> -(lit(\"else\") >> lit(\":\") >> suite);
try_stmt = (lit(\"try\") >> lit(\":\") >> suite >> (+(except_clause >> lit(\":\") >> suite) >> -(lit(\"else\") >> lit(\":\") >> suite) >> -(lit(\"finally\") >> lit(\":\") >> suite)|lit(\"finally\") >> lit(\":\") >> suite));
with_stmt = lit(\"with\") >> with_item >> *(lit(\",\") >> with_item) >> lit(\":\") >> suite;
with_item = test >> -(lit(\"as\") >> expr);
except_clause = lit(\"except\") >> -(test >> -((lit(\"as\")|lit(\",\")) >> test));
suite = simple_stmt|NEWLINE >> INDENT >> +stmt >> DEDENT;
testlist_safe = old_test >> -(+(lit(\",\") >> old_test) >> -(lit(\",\")));
old_test = or_test|old_lambdef;
old_lambdef = lit(\"lambda\") >> -(varargslist) >> lit(\":\") >> old_test;
test = or_test >> -(lit(\"if\") >> or_test >> lit(\"else\") >> test)|lambdef;
or_test = and_test >> *(lit(\"or\") >> and_test);
and_test = not_test >> *(lit(\"and\") >> not_test);
not_test = lit(\"not\") >> not_test|comparison;
comparison = expr >> *(comp_op >> expr);
comp_op = lit(\"<\")|lit(\">\")|lit(\"==\")|lit(\">=\")|lit(\"<=\")|lit(\"<>\")|lit(\"!=\")|lit(\"in\")|lit(\"not in\")|lit(\"is\")|lit(\"is not\");
expr = xor_expr >> *(lit(\"|\") >> xor_expr);
xor_expr = and_expr >> *(lit(\"^\") >> and_expr);
and_expr = shift_expr >> *(lit(\"&\") >> shift_expr);
shift_expr = arith_expr >> *((lit(\"<<\")|lit(\">>\")) >> arith_expr);
arith_expr = term >> *((lit(\"+\")|lit(\"-\")) >> term);
term = factor >> *((lit(\"*\")|lit(\"/\")|lit(\"%\")|lit(\"//\")) >> factor);
factor = (lit(\"+\")|lit(\"-\")|lit(\"~\")) >> factor|power;
power = atom >> *trailer >> -(lit(\"**\") >> factor);
atom = (lit(\"(\") >> -(yield_expr|testlist_comp) >> lit(\")\")|lit(\"-(\") >> -(listmaker) >> lit(\")\")|lit(\"{\") >> -(dictorsetmaker) >> lit(\"}\")|lit(\"`\") >> testlist1 >> lit(\"`\")|NAME|NUMBER|+STRING);
listmaker = test >> ( list_for|*(lit(\",\")) );
testlist_comp = test >> ( comp_for|*(lit(\",\")) );
lambdef = lit(\"lambda\") >> -(varargslist) >> lit(\":\") >> test;
trailer = lit(\"(\") >> -(arglist) >> lit(\")\")|lit(\"[\") >> subscriptlist >> lit(\"]\")|lit(\".\") >> NAME;
subscriptlist = subscript >> *(lit(\",\") >> subscript) >> -(lit(\",\"));
subscript = lit(\".\") >> lit(\".\") >> lit(\".\")|test|-(test) >> lit(\":\") >> -(test) >> -(sliceop);
sliceop = lit(\":\") >> -(test);
exprlist = expr >> *(lit(\",\") >> expr) >> -(lit(\",\"));
testlist = test >> *(lit(\",\"));
dictorsetmaker = ( (test >> lit(\":\") >> test >> (comp_for|*(lit(\",\") >> test >> lit(\":\") >> test) >> -(lit(\",\"))))|(test >> (comp_for|*(lit(\",\")))) );
classdef = lit(\"class\") >> NAME >> -(lit(\"(\") >> -(testlist) >> lit(\")\")) >> lit(\":\") >> suite;
arglist = *(argument >> lit(\",\")) >> (argument >> -(lit(\",\"))|lit(\"*\") >> test >> *(lit(\",\") >> argument) >> -(lit(\",\") >> lit(\"**\") >> test)|lit(\"**\") >> test);
argument = test >> -(comp_for)|test >> lit(\"=\") >> test;
list_iter = list_for|list_if;
list_for = lit(\"for\") >> exprlist >> lit(\"in\") >> testlist_safe >> -(list_iter);
list_if = lit(\"if\") >> old_test >> -(list_iter);
comp_iter = comp_for|comp_if;
comp_for = lit(\"for\") >> exprlist >> lit(\"in\") >> or_test >> -(comp_iter);
comp_if = lit(\"if\") >> old_test >> -(comp_iter);
testlist1 = test >> *(lit(\",\") >> test);
encoding_decl = NAME.alias();
yield_expr = lit(\"yield\") >> -(testlist);
}
// LEXEMS
qi::rule<Iterator,space_type> NEWLINE;
qi::rule<Iterator,space_type> INDENT;
qi::rule<Iterator,space_type> DEDENT;
qi::rule<Iterator,space_type> ENDMARKER;
qi::rule<Iterator,space_type> NAME;
qi::rule<Iterator,space_type> NUMBER;
qi::rule<Iterator,space_type> STRING;
// IDENTIFIER
qi::rule<Iterator,space_type> identifier;
// STRING LITERAL
qi::rule<Iterator,space_type> stringliteral;
qi::rule<Iterator,space_type> stringprefix;
qi::rule<Iterator,space_type> shortstring;
qi::rule<Iterator,space_type> longstring;
qi::rule<Iterator,space_type> shortstringitem;
qi::rule<Iterator,space_type> longstringitem;
qi::rule<Iterator,space_type> shortstringchar;
qi::rule<Iterator,space_type> longstringchar;
qi::rule<Iterator,space_type> escapeseq;
// INTEGER LITERAL
qi::rule<Iterator,space_type> longinteger;
qi::rule<Iterator,space_type> integer;
qi::rule<Iterator,space_type> decimalinteger;
qi::rule<Iterator,space_type> octinteger;
qi::rule<Iterator,space_type> hexinteger;
qi::rule<Iterator,space_type> bininteger;
qi::rule<Iterator,space_type> nonzerodigit;
qi::rule<Iterator,space_type> octdigit;
qi::rule<Iterator,space_type> bindigit;
qi::rule<Iterator,space_type> hexdigit;
// FLOAT LITERAL
qi::rule<Iterator,space_type> floatnumber;
qi::rule<Iterator,space_type> pointfloat;
qi::rule<Iterator,space_type> exponentfloat;
qi::rule<Iterator,space_type> intpart;
qi::rule<Iterator,space_type> fraction;
qi::rule<Iterator,space_type> exponent;
//IMAGINARY LITERAL
qi::rule<Iterator,space_type> imagnumber;
// PYTHON GRAMMAR
qi::rule<Iterator,space_type> single_input;
qi::rule<Iterator,space_type> file_input;
qi::rule<Iterator,space_type> eval_input;
qi::rule<Iterator,space_type> decorator;
qi::rule<Iterator,space_type> decorators;
qi::rule<Iterator,space_type> decorated;
qi::rule<Iterator,space_type> funcdef;
qi::rule<Iterator,space_type> parameters;
qi::rule<Iterator,space_type> varargslist;
qi::rule<Iterator,space_type> fpdef;
qi::rule<Iterator,space_type> fplist;
qi::rule<Iterator,space_type> stmt;
qi::rule<Iterator,space_type> simple_stmt;
qi::rule<Iterator,space_type> small_stmt;
qi::rule<Iterator,space_type> expr_stmt;
qi::rule<Iterator,space_type> augassign;
qi::rule<Iterator,space_type> print_stmt;
qi::rule<Iterator,space_type> del_stmt;
qi::rule<Iterator,space_type> pass_stmt;
qi::rule<Iterator,space_type> flow_stmt;
qi::rule<Iterator,space_type> break_stmt;
qi::rule<Iterator,space_type> continue_stmt;
qi::rule<Iterator,space_type> return_stmt;
qi::rule<Iterator,space_type> yield_stmt;
qi::rule<Iterator,space_type> raise_stmt;
qi::rule<Iterator,space_type> import_stmt;
qi::rule<Iterator,space_type> import_name;
qi::rule<Iterator,space_type> import_from;
qi::rule<Iterator,space_type> import_as_name;
qi::rule<Iterator,space_type> dotted_as_name;
qi::rule<Iterator,space_type> import_as_names;
qi::rule<Iterator,space_type> dotted_as_names;
qi::rule<Iterator,space_type> dotted_name;
qi::rule<Iterator,space_type> global_stmt;
qi::rule<Iterator,space_type> exec_stmt;
qi::rule<Iterator,space_type> assert_stmt;
qi::rule<Iterator,space_type> compound_stmt;
qi::rule<Iterator,space_type> if_stmt;
qi::rule<Iterator,space_type> while_stmt;
qi::rule<Iterator,space_type> for_stmt;
qi::rule<Iterator,space_type> try_stmt;
qi::rule<Iterator,space_type> with_stmt;
qi::rule<Iterator,space_type> with_item;
qi::rule<Iterator,space_type> except_clause;
qi::rule<Iterator,space_type> suite;
qi::rule<Iterator,space_type> testlist_safe;
qi::rule<Iterator,space_type> old_test;
qi::rule<Iterator,space_type> old_lambdef;
qi::rule<Iterator,space_type> test;
qi::rule<Iterator,space_type> or_test;
qi::rule<Iterator,space_type> and_test;
qi::rule<Iterator,space_type> not_test;
qi::rule<Iterator,space_type> comparison;
qi::rule<Iterator,space_type> comp_op;
qi::rule<Iterator,space_type> expr;
qi::rule<Iterator,space_type> xor_expr;
qi::rule<Iterator,space_type> and_expr;
qi::rule<Iterator,space_type> shift_expr;
qi::rule<Iterator,space_type> arith_expr;
qi::rule<Iterator,space_type> term;
qi::rule<Iterator,space_type> factor;
qi::rule<Iterator,space_type> power;
qi::rule<Iterator,space_type> atom;
qi::rule<Iterator,space_type> listmaker;
qi::rule<Iterator,space_type> testlist_comp;
qi::rule<Iterator,space_type> lambdef;
qi::rule<Iterator,space_type> trailer;
qi::rule<Iterator,space_type> subscriptlist;
qi::rule<Iterator,space_type> subscript;
qi::rule<Iterator,space_type> sliceop;
qi::rule<Iterator,space_type> exprlist;
qi::rule<Iterator,space_type> testlist;
qi::rule<Iterator,space_type> dictorsetmaker;
qi::rule<Iterator,space_type> classdef;
qi::rule<Iterator,space_type> arglist;
qi::rule<Iterator,space_type> argument;
qi::rule<Iterator,space_type> list_iter;
qi::rule<Iterator,space_type> list_for;
qi::rule<Iterator,space_type> list_if;
qi::rule<Iterator,space_type> comp_iter;
qi::rule<Iterator,space_type> comp_for;
qi::rule<Iterator,space_type> comp_if;
qi::rule<Iterator,space_type> testlist1;
qi::rule<Iterator,space_type> encoding_decl;
qi::rule<Iterator,space_type> yield_expr;
};
问题是当我尝试解析简单文件时:
pass
经过一些词法分析器模块后的女巫是:
pass <NEWLINE> <EOT>
解析失败,并在第一个字符处停止。
当我尝试使用pass_stmt规则解析此文件时,一切正常(除了我们仍然拥有和剩余,但密码为\“ consumed \”)。
当我尝试使用规则上一级解析它时-small_stmt-解析器停止在
> <EOT>
消耗
pass <NEWLINE
上一级-simple_stmt提供与file_input相同的结果-解析器在第一个字符处停止。
在添加在PYTHON GRAMMAR部分(从http://docs.python.org/reference/grammar.html获取)中定义的语法之前,所有功能都正常运行。解析器识别的标识符,文字,数字等
有人知道这里有什么问题吗?
解决方法
我建议您按照此处的说明启用调试。这将使您洞悉实际情况。通常,我建议逐步构建语法,而不要尝试一次大的实现。
您上面提供的代码对于不团结的读者来说很难理解,因为它很大且没有注释。编写语法非常类似于编写“普通”代码。封装是成功的关键。尝试构建覆盖独立部分的较小语法,并根据需要组合这些子语法。有关最佳做法,请参见此处。