继续:
int Perl_yyparse (pTHX_ int gramtype){
register yy_parser *parser; /* the parser object */
register yy_stack_frame *ps; /* current parser stack frame */
----从这两句话,我们看出,有两个变量用于parser,也就是说,是一种多层语言。
这种技术,是很常见的。比如,解析一门语言时,进入了另一种状态,比如进入了注释。
往前,我们找到最重要的一句话:
parser->yychar = yylex();
,所有的编译器都是这样的,lex是yacc的一个工具。所以,自然要从yacc中调用lex.
简单来说,编译器,是一种流式的解析器,它一次读入流,完成一个任务。
虽然,有的编译器,如C语言,理论上,是多遍完成解析的,因为有预编译。
但,对于每一次来说,也就是每一种输入来说,只需要解析一次。
这也是编译器的精妙之处。
lex的任务,是一个字符,一个字符地读入,然后驱动内部的状态机。当状态机被激发,则会发给yacc一个token.
前面我解释过了,perl解析器,没有专门编写一个lex文件,而是直接手工编写了一个token. 只是原理,也lex没有差别。
============
歇一会,
的第504行找到:
/* A bare statement,lacking label and other aspects of state op */
barestmt: PLUGSTMT
{ $$ = $1; }
| PEG
{
$$ = newOP(OP_NULL,0);
TOKEN_GETMAD($1,$$,'p');
}
。。。
| ';'
{
PL_parser->expect = XSTATE;
$$ = IF_MAD(newOP(OP_NULL,0),(OP*)NULL);
TOKEN_GETMAD($1,';');
PL_parser->copline = NOLINE;
}
;
========================================
现在,停掉重头再来。
因为关键的东西还都没有找到。
重新写个脚本,最简单的:
前面,打两个回车,然后定义个变量,就可以了。
编译器都是这样写的,从一个个简单的语句解析开始。
然后,在token.c中,找到一句话:
void
Perl_lex_start(pTHX_ SV *line,PerlIO *rsfp,U32 flags)
{
。。。
parser->linestart = SvPVX(parser->linestr);
parser->linestr,是在哪里初始化的呢?
-----------
SvPVX,是从yacc的当前yyval中,得到想要的东西。因为yyval是一个union,所以,要根据需要,得到那个具体的值。
define SvPVX(sv) ((sv)->sv_u.svu_pv)
char *linestart; /* beginning of most recently read line */
-------------------------
重来。
真是难搞。
找到了第一行处。
我一定是错过了许多东西。而且大部分地方,也没看懂。
原来是想拿来直接用perl解析器。
然后加个自定义的东西。
现在来看,太难了。
我再想想其它的办法。
就算是一个记录吧。
现在,才明白,原来lex和yacc的解析器,语法与perl很象。
找到了赋值语句:
/* Binary operators between terms */
termbinop: term ASSIGnop term /* $x = $y */
{ $$ = newASSIGnop(OPf_STACKED,$1,IVAL($2),$3);
TOKEN_GETMAD($2,'o');
}
在核心的op.c中:
/* =for apidoc Am|OP *|newASSIGnop|I32 flags|OP *left|I32 optype|OP *right Constructs,checks,and returns an assignment op. I<left> and I<right> supply the parameters of the assignment; they are consumed by this function and become part of the constructed op tree. If I<optype> is C<OP_ANDASSIGN>,C<OP_ORASSIGN>,or C<OP_DORASSIGN>,then a suitable conditional optree is constructed. If I<optype> is the opcode of a binary operator,such as C<OP_BIT_OR>,then an op is constructed that performs the binary operation and assigns the result to the left argument. Either way,if I<optype> is non-zero then I<flags> has no effect. If I<optype> is zero,then a plain scalar or list assignment is constructed. Which type of assignment it is is automatically determined. I<flags> gives the eight bits of C<op_flags>,except that C<OPf_KIDS> will be set automatically,and,shifted up eight bits,the eight bits of C<op_private>,except that the bit with value 1 or 2 is automatically set as required. =cut */ OP * Perl_newASSIGnop(pTHX_ I32 flags,OP *left,I32 optype,OP *right) { dVAR; OP *o; if (optype) { if (optype == OP_ANDASSIGN || optype == OP_ORASSIGN || optype == OP_DORASSIGN) { return newlogoP(optype,op_lvalue(scalar(left),optype),newUnop(OP_SASSIGN,scalar(right))); } else { return newBInop(optype,OPf_STACKED,scalar(right)); } } if (is_list_assignment(left)) { static const char no_list_state[] = "Initialization of state variables" " in list context currently forbidden"; OP *curop; bool maybe_common_vars = TRUE; PL_modcount = 0; left = op_lvalue(left,OP_AASSIGN); curop = list(force_list(left)); o = newBInop(OP_AASSIGN,flags,list(force_list(right)),curop); o->op_private = (U8)(0 | (flags >> 8)); if ((left->op_type == OP_LIST || (left->op_type == OP_NULL && left->op_targ == OP_LIST))) { OP* lop = ((LISTOP*)left)->op_first; maybe_common_vars = FALSE; while (lop) { if (lop->op_type == OP_PADSV || lop->op_type == OP_PADAV || lop->op_type == OP_PADHV || lop->op_type == OP_PADANY) { if (!(lop->op_private & OPpLVAL_INTRO)) maybe_common_vars = TRUE; if (lop->op_private & OPpPAD_STATE) { if (left->op_private & OPpLVAL_INTRO) { /* Each variable in state($a,$b,$c) = ... */ } else { /* Each state variable in (state $a,my $b,our $c,$d,undef) = ... */ } yyerror(no_list_state); } else { /* Each my variable in (state $a,undef) = ... */ } } else if (lop->op_type == OP_UNDEF || lop->op_type == OP_PUSHMARK) { /* undef may be interesting in (state $a,undef,state $c) */ } else { /* Other ops in the list. */ maybe_common_vars = TRUE; } lop = lop->op_sibling; } } else if ((left->op_private & OPpLVAL_INTRO) && ( left->op_type == OP_PADSV || left->op_type == OP_PADAV || left->op_type == OP_PADHV || left->op_type == OP_PADANY)) { if (left->op_type == OP_PADSV) maybe_common_vars = FALSE; if (left->op_private & OPpPAD_STATE) { /* All single variable list context state assignments,hence state ($a) = ... (state $a) = ... state @a = ... state (@a) = ... (state @a) = ... state %a = ... state (%a) = ... (state %a) = ... */ yyerror(no_list_state); } } /* PL_generation sorcery: * an assignment like ($a,$b) = ($c,$d) is easier than * ($a,$a),since there is no need for temporary vars. * To detect whether there are common vars,the global var * PL_generation is incremented for each assign op we compile. * Then,while compiling the assign op,we run through all the * variables on both sides of the assignment,setting a spare slot * in each of them to PL_generation. If any of them already have * that value,we kNow we've got commonality. We Could use a * single bit marker,but then we'd have to make 2 passes,first * to clear the flag,then to test and set it. To find somewhere * to store these values,evil chicanery is done with SvUVX(). */ if (maybe_common_vars) { PL_generation++; if (aassign_common_vars(o)) o->op_private |= OPpASSIGN_COMMON; LINKLIST(o); } if (right && right->op_type == OP_SPLIT && !PL_madskills) { OP* tmpop = ((LISTOP*)right)->op_first; if (tmpop && (tmpop->op_type == OP_PUSHRE)) { PMOP * const pm = (PMOP*)tmpop; if (left->op_type == OP_RV2AV && !(left->op_private & OPpLVAL_INTRO) && !(o->op_private & OPpASSIGN_COMMON) ) { tmpop = ((Unop*)left)->op_first; if (tmpop->op_type == OP_GV #ifdef USE_ITHREADS && !pm->op_pmreplrootu.op_pmtargetoff #else && !pm->op_pmreplrootu.op_pmtargetgv #endif ) { #ifdef USE_ITHREADS pm->op_pmreplrootu.op_pmtargetoff = cPADOPx(tmpop)->op_padix; cPADOPx(tmpop)->op_padix = 0; /* steal it */ #else pm->op_pmreplrootu.op_pmtargetgv = MUTABLE_GV(cSVOPx(tmpop)->op_sv); cSVOPx(tmpop)->op_sv = NULL; /* steal it */ #endif pm->op_pmflags |= PMf_ONCE; tmpop = cUnopo->op_first; /* to list (nulled) */ tmpop = ((Unop*)tmpop)->op_first; /* to pushmark */ tmpop->op_sibling = NULL; /* don't free split */ right->op_next = tmpop->op_next; /* fix starting loc */ op_free(o); /* blow off assign */ right->op_flags &= ~OPf_WANT; /* "I don't kNow and I don't care." */ return right; } } else { if (PL_modcount < RETURN_UNLIMITED_NUMBER && ((LISTOP*)right)->op_last->op_type == OP_CONST) { SV *sv = ((SVOP*)((LISTOP*)right)->op_last)->op_sv; if (SvIOK(sv) && SvIVX(sv) == 0) sv_setiv(sv,PL_modcount+1); } } } } return o; } if (!right) right = newOP(OP_UNDEF,0); if (right->op_type == OP_READLINE) { right->op_flags |= OPf_STACKED; return newBInop(OP_NULL,OP_SASSIGN),scalar(right)); } else { o = newBInop(OP_SASSIGN,scalar(right),OP_SASSIGN) ); } return o; }
注意那个OP.
#define BASEOP \
OP* op_next; \
OP* op_sibling; \
OP* (*op_ppaddr)(pTHX); \
MADPROP_IN_BASEOP \
PADOFFSET op_targ; \
PERL_BITFIELD16 op_type:9; \
PERL_BITFIELD16 op_opt:1; \
PERL_BITFIELD16 op_latefree:1; \
PERL_BITFIELD16 op_latefreed:1; \
PERL_BITFIELD16 op_attached:1; \
PERL_BITFIELD16 op_spare:3; \
U8 op_flags; \
U8 op_private;
#endif
用来记录操作表达式。
因为我就写了一句话,后面什么也没干。
也就没什么可跟的了。
跟的过程中,可以清楚地看到,如果在lex中,没有找到什么yacc 感兴趣的东西,lex就把这些东西吞掉了。
主要就是这句:
parser->yychar = yylex();
===========
不过,perl的解释器的确是我所见过的最复杂的。
lex 会在开始前,和结束后,生成一些token,发给yacc。
这让我头大了许多。
先到这里吧。以后也不打算写了。实在累人。