基于 flex/bison 的编译器:在 C 程序中解析 asm[汇编命令]

问题描述

我是 flex 和 bison 的新手。我想编写一个编译器来读取 C 程序并将其转换为类似于汇编的处理器命令。我下载了一个使用 flex 和 bison 的预先编写的编译器。我应该更改scanner.l 和parser.y,因为它可以处理我的C 代码中的asm 命令,例如asm [asm command1 \n asm command2 \n asm command3 \n ...]。我应该在这两个文件添加哪些定义和规则?

scanner.l:

%{
#include "scanner.h"
#include "y.tab.h"
#include <stdio.h>
#include <stdlib.h>
#define MAX_STR_CONST 1000
char string_buf[MAX_STR_CONST];
char *string_buf_ptr;
int line_num = 1;
int line_pos = 1;

void updatePosition();
#define YY_USER_ACTION updatePosition();

%}

NUMBER  (0)|([1-9][0-9]*)
HExnuM  ((0x)|(0X))([a-fA-F0-9]+)
IDENT   [a-zA-Z_][a-zA-Z0-9_]*

%x comment
%x str


%option noyywrap
%option yylineno
%option nounput

%%

\"      string_buf_ptr = string_buf; BEGIN(str);
<str>{
\"      { /* saw closing quote - all done */
            BEGIN(INITIAL);
            *string_buf_ptr = '\0';
            /* return string constant token type and
            * value to parser
            */
            yylval.strConst = new std::string(string_buf);
            return T_STR_CONST;
        }

\n      {
            /* error - unterminated string constant */
            /* generate error message */
            yyerror("Unterminated string constant.");
        }

<<EOF>> { return T_UNTERM_STRING; }

\\[0-7]{1,3} {
        /* octal escape sequence */
        int result;
        
        (void) sscanf( yytext + 1,"%o",&result );

        if ( result > 0xff )
                /* error,constant is out-of-bounds */

        *string_buf_ptr++ = result;
        }

\\[0-9]+ {
        /* generate error - bad escape sequence; something
        * like '\48' or '\0777777'
        */
        yyerror("Bad string escape sequence.");
        }

\\n         *string_buf_ptr++ = '\n';
\\t         *string_buf_ptr++ = '\t';
\\r         *string_buf_ptr++ = '\r';
\\b         *string_buf_ptr++ = '\b';
\\f         *string_buf_ptr++ = '\f';

\\(.|\n)    *string_buf_ptr++ = yytext[1];

[^\\\n\"]+  {
            char *yptr = yytext;
            
            while ( *yptr )
                    *string_buf_ptr++ = *yptr++;
            }
}

"/*"            BEGIN(comment);
<comment>{
[^*\n]*        /* eat anything that's not a '*' */
"*"+[^*/\n]*   /* eat up '*'s not followed by '/'s */
\n             
<<EOF>>         { return T_UNTERM_COMMENT; }
"*"+"/"        BEGIN(INITIAL);
}




"do"            { return T_DO; }
"while"         { return T_WHILE; }
"for"           { return T_FOR; }
"if"            { return T_IF; }
"else"          { return T_ELSE; }
"int"           { return T_INT_TYPE; }
"string"        { return T_STRING_TYPE; }
"void"          { return T_VOID_TYPE; }
"struct"        { return T_STRUCT; }
"return"        { return T_RETURN; }
"switch"        { return T_SWITCH; }
"case"          { return T_CASE; }
"default"       { return T_DEFAULT; }
"break"         { return T_BREAK; }
"continue"      { return T_CONTINUE; }
"sizeof"        { return T_SIZEOF; }

"{"             { return '{'; }
"}"             { return '}'; }
"("             { return '('; }
")"             { return ')'; }
"["             { return '['; }
"]"             { return ']'; }
"+"             { return '+'; }
"-"             { return '-'; }
"*"             { return '*'; }
"/"             { return '/'; }
"%"             { return '%'; }
"="             { return '='; }
">"             { return '>'; }
"<"             { return '<'; }
"!"             { return '!'; }
"|"             { return '|'; }
"&"             { return '&'; }
"^"             { return '^'; }
"~"             { return '~'; }
"."             { return '.'; }
":"             { return ':'; }
";"             { return ';'; }
","             { return ','; }

"<<"            { return T_LEFT_SHIFT; }
">>"            { return T_RIGHT_SHIFT; }
"&&"            { return T_BOOL_AND; }
"||"            { return T_BOOL_OR; }
"+="            { return T_PLUS_EQUALS; }
"-="            { return T_MINUS_EQUALS; }
"*="            { return T_STAR_EQUALS; }
"/="            { return T_DIV_EQUALS; }
"%="            { return T_MOD_EQUALS; }
"=="            { return T_EQUAL; }
"<="            { return T_LESS_OR_EQUAL; }
">="            { return T_GREATER_OR_EQUAL; }
"!="            { return T_NOT_EQUAL; }
"|="            { return T_BIT_OR_EQUALS; }
"&="            { return T_BIT_AND_EQUALS; }
"^="            { return T_BIT_XOR_EQUALS; }
"~="            { return T_BIT_NOT_EQUALS; }
"->"            { return T_ARROW; }
"<<="           { return T_LEFT_SHIFT_EQUALS; }
">>="           { return T_RIGHT_SHIFT_EQUALS; }
"++"            { return T_PLUS_PLUS; }
"--"            { return T_MINUS_MINUS; }

" "|"\t"|"\r"|"\n"|"const"  {}
{HExnuM}        { yylval.intConst = std::strtoul(yytext,NULL,0); return T_INT_CONST; }
{NUMBER}        { yylval.intConst = atoi(yytext); return T_INT_CONST; }
{IDENT}         { yylval.ident = new std::string(yytext); return T_IDENT; }
.               {{ char err[] = "UnkNown Character: a"; err[strlen(err)-1] = *yytext; yyerror(err); }}

%%

/**
 * This function is called on every token,and updates the yylloc global variable,which stores the
 * location/position of the current token.
 */
void updatePosition() {
    yylloc.first_line = line_num;
    yylloc.first_column = line_pos;
    char* text = yytext;
    while(*text != '\0') {
        if(*text == '\n') {
            line_num++;
            line_pos = 1;
        } else {
            line_pos++;
        }
        text++;
    }
    yylloc.last_line = line_num;
    yylloc.last_column = line_pos;
}

parser.y:

%code requires {

#include "Declaration.h"
#include "Expression.h"
#include "Statement.h"
#include "Type.h"
#include "Parser.h"
#include "Util.h"

extern Program* program_out;

}

%locations
%define parse.lac full
%error-verbose

%{
#include "Parser.h"
#include "scanner.h"
#include <string>
#include <iostream>
#include "Type.h"

%}

//%parse-param {Program*& out}

%union {
    char* cstr;
    std::string* ident;
    std::string* strConst;
    unsigned int intConst;
    Type* type;
    std::vector<Declaration*>* declareList;
    Declaration* declare;
    ConstantExpression* constant;
    std::vector<FunctionParameter*>* paramList;
    FunctionParameter* param;
    std::vector<StructMember*>* structMemberList;
    StructMember* structMember;
    StatementBlock* statementBlock;
    Statement* statement;
    std::vector<Statement*>* statementList;
    Expression* expression;
    std::vector<Expression*>* expressionList;
}

%type <type> type
%type <cstr> root
%type <declareList> root_declare_list
%type <declare> root_declare
%type <constant> constant
%type <paramList> param_list non_empty_param_list
%type <param> param
%type <structMemberList> struct_list
%type <structMember> struct_member;
%type <statementBlock> statement_block
%type <statementList> statement_list
%type <statement> statement
%type <expression> expression
%type <expressionList> argument_list non_empty_argument_list

%token <ident> T_IDENT
%token <strConst> T_STR_CONST
%token <intConst> T_INT_CONST
%token T_IF T_ELSE T_FOR T_WHILE T_DO T_SIZEOF 
%token T_INT_TYPE T_STRING_TYPE T_VOID_TYPE T_STRUCT
%token T_RETURN T_SWITCH T_CASE T_DEFAULT T_BREAK T_CONTINUE
%token T_BOOL_OR T_BOOL_AND
%token T_LEFT_SHIFT T_RIGHT_SHIFT T_PLUS_EQUALS T_MINUS_EQUALS 
%token T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS T_EQUAL 
%token T_LESS_OR_EQUAL T_GREATER_OR_EQUAL T_NOT_EQUAL
%token T_BIT_OR_EQUALS T_BIT_AND_EQUALS T_BIT_XOR_EQUALS
%token T_BIT_NOT_EQUALS T_ARROW T_LEFT_SHIFT_EQUALS
%token T_RIGHT_SHIFT_EQUALS T_PLUS_PLUS T_MINUS_MINUS
%token T_UNTERM_STRING T_UNTERM_COMMENT

/* tokens for precedence */
%token PREC_ADDRESS PREC_DEREFERENCE PREC_UNARY_MINUS PREC_UNARY_PLUS
%token PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%token PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%token PREC_APPLICATION

/* lowest precedence */
%left ','
%right T_BIT_AND_EQUALS T_BIT_XOR_EQUALS T_BIT_OR_EQUALS
%right T_LEFT_SHIFT_EQUALS T_RIGHT_SHIFT_EQUALS
%right T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS
%right T_PLUS_EQUALS T_MINUS_EQUALS
%right '='
%left T_BOOL_OR
%left T_BOOL_AND
%left '|'
%left '^'
%left '&'
%left T_EQUAL T_NOT_EQUAL
%left '>' T_GREATER_OR_EQUAL
%left '<' T_LESS_OR_EQUAL
%left T_LEFT_SHIFT T_RIGHT_SHIFT
%left '+' '-'
%left '*' '/' '%'
%right PREC_ADDRESS
%right PREC_DEREFERENCE
%right '!' '~'
%right PREC_UNARY_PLUS PREC_UNARY_MINUS
%right PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%right T_PLUS_PLUS T_MINUS_MINUS
%left T_ARROW
%left '.'
%left '['
%left PREC_APPLICATION
%left PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%nonassoc T_IF
%nonassoc T_ELSE
/* highest precedence */



%%

root:
        root_declare_list                                       { $$ = NULL; program_out = new Program(@$,*$1); delete $1; }
    ;

root_declare_list:
        root_declare_list root_declare                          { $$ = $1; $1->push_back($2); }
    |                                                           { $$ = new std::vector<Declaration*>(); }
    ;

root_declare:
        type T_IDENT '(' param_list ')' ';'                     { $$ = new FunctionPrototype(@$,$1,*$2,*$4); delete $2; delete $4; }
    |   type T_IDENT '(' param_list ')' statement_block         { $$ = new FunctionDeclaration(@$,*$4,$6); delete $2; delete $4; }
    |   type T_IDENT ';'                                        { $$ = new GlobalVarDeclaration(@$,*$2); delete $2; }
    |   type T_IDENT '[' T_INT_CONST ']' ';'                    { $$ = new GlobalArrayDeclaration(@$,$4); delete $2; }
    |   type T_IDENT '=' constant ';'                           { $$ = new GlobalVarDeclarationInit(@$,$4); delete $2; }
    |   T_STRUCT T_IDENT '{' struct_list '}' ';'                { $$ = new StructDeclaration(@$,*$4); delete $2; delete $4; }
    |   T_STRUCT T_IDENT ';'                                    { $$ = new StructPredeclaration(@$,*$2); delete $2; }
    ;

constant:
        T_INT_CONST                                             { $$ = new IntConstantExpression(@$,$1); }
    |   T_STR_CONST                                             { $$ = new StringConstantExpression(@$,*$1); delete $1; }
    ;

param_list:
        non_empty_param_list                                    { $$ = $1; }
    |                                                           { $$ = new std::vector<FunctionParameter*>(); }
    ;

non_empty_param_list:
        non_empty_param_list ',' param                          { $$ = $1; $1->push_back($3); }
    |   param                                                   { $$ = new std::vector<FunctionParameter*>({$1}); }
    ;

param:
        type T_IDENT                                            { $$ = new FunctionParameter(@$,*$2); delete $2; }
    ;

struct_list:
        struct_list struct_member ';'                           { $$ = $1; $1->push_back($2); }
    |                                                           { $$ = new std::vector<StructMember*>(); }
    ;

struct_member:
        type T_IDENT                                            { $$ = new StructMember(@$,*$2); delete $2; }
    ;

type:
        type '*'                                                { $$ = new PointerType($1); }
    |   T_STRUCT T_IDENT                                        { $$ = new StructType(*$2); delete $2; }
    |   T_INT_TYPE                                              { $$ = new IntType(); }
    |   T_VOID_TYPE                                             { $$ = new VoidType(); }
    |   T_STRING_TYPE                                           { $$ = new StringType(); }
    ;

statement_block:
        '{' statement_list '}'                                  { $$ = new StatementBlock(@$,*$2); delete $2; }
    ;

statement_list:
        statement_list statement                                { $$ = $1; $1->push_back($2); }
    |                                                           { $$ = new std::vector<Statement*>(); }
    ;

statement:
        expression ';'                                          { $$ = $1; }
    |   type T_IDENT ';'                                        { $$ = new VarDeclaration(@$,*$2); delete $2; }
    |   type T_IDENT '=' expression ';'                         { $$ = new VarDeclarationInit(@$,$4); delete $2; }
    |   type T_IDENT '[' T_INT_CONST ']' ';'                    { $$ = new ArrayDeclaration(@$,$4); delete $2; }
    |   T_WHILE '(' expression ')' statement                    { $$ = new WhileStatement(@$,$3,$5); }
    |   T_DO statement T_WHILE '(' expression ')' ';'           { $$ = new DoWhileStatement(@$,$2,$5); }
    |   T_FOR '(' expression ';' expression ';' expression ')' statement { $$ = new ForStatement(@$,$5,$7,$9); }
    |   statement_block                                         { $$ = $1; }
    |   T_IF '(' expression ')' statement %prec T_IF            { $$ = new IfStatement(@$,$5); }
    |   T_IF '(' expression ')' statement T_ELSE statement      { $$ = new IfElseStatement(@$,$7); }
    |   T_BREAK ';'                                             { $$ = new BreakStatement(@$); }
    |   T_CONTINUE ';'                                          { $$ = new ContinueStatement(@$); }
    |   T_SWITCH '(' expression ')' '{' statement_list '}'      { $$ = new SwitchStatement(@$,*$6); delete $6; }
    |   T_CASE T_INT_CONST ':'                                  { $$ = new CaseStatement(@$,$2); }
    |   T_DEFAULT ':'                                           { $$ = new DefaultStatement(@$); }
    |   T_RETURN expression ';'                                 { $$ = new ReturnStatement(@$,$2); }
    ;

expression:
        expression '=' expression                               { $$ = new AssignExpression(@$,$3); }
    |   expression T_PLUS_EQUALS expression                     { $$ = new AssignExpression(@$,new BinaryOperatorExpression(@3,$1->clone(),"+",$3)); }
    |   expression T_MINUS_EQUALS expression                    { $$ = new AssignExpression(@$,"-",$3)); }
    |   expression T_STAR_EQUALS expression                     { $$ = new AssignExpression(@$,"*",$3)); }
    |   expression T_DIV_EQUALS expression                      { $$ = new AssignExpression(@$,"/",$3)); }
    |   expression T_MOD_EQUALS expression                      { $$ = new AssignExpression(@$,"%",$3)); }
    |   expression T_BIT_AND_EQUALS expression                  { $$ = new AssignExpression(@$,"&",$3)); }
    |   expression T_BIT_OR_EQUALS expression                   { $$ = new AssignExpression(@$,"|",$3)); }
    |   expression T_BIT_XOR_EQUALS expression                  { $$ = new AssignExpression(@$,"^",$3)); }
    |   expression T_LEFT_SHIFT_EQUALS expression               { $$ = new AssignExpression(@$,"<<",$3)); }
    |   expression T_RIGHT_SHIFT_EQUALS expression              { $$ = new AssignExpression(@$,">>",$3)); }
    |   expression T_PLUS_PLUS %prec PREC_SUFFIX_PLUS_PLUS      { $$ = new UnaryAssignExpression(@$,"++"); }
    |   T_PLUS_PLUS expression %prec PREC_PREFIX_PLUS_PLUS      { $$ = new UnaryAssignExpression(@$,"++",$2); }
    |   expression T_MINUS_MINUS %prec PREC_SUFFIX_MINUS_MINUS  { $$ = new UnaryAssignExpression(@$,"--"); }
    |   T_MINUS_MINUS expression %prec PREC_PREFIX_MINUS_MINUS  { $$ = new UnaryAssignExpression(@$,"--",$2); }
    |   constant                                                { $$ = $1; }
    |   '(' expression ')'                                      { $$ = $2; }
    |   T_IDENT '(' argument_list ')' %prec PREC_APPLICATION    { $$ = new FunctionCallExpression(@$,*$1,*$3); delete $1; delete $3; }
    |   T_SIZEOF '(' type ')'                                   { $$ = new SizeofExpression(@$,$3); }
    |   '!' expression                                          { $$ = new UnaryOperatorExpression(@$,"!",$2); }
    |   '~' expression                                          { $$ = new UnaryOperatorExpression(@$,"~",$2); }
    |   '+' expression %prec PREC_UNARY_PLUS                    { $$ = new UnaryOperatorExpression(@$,$2); }
    |   '-' expression %prec PREC_UNARY_MINUS                   { $$ = new UnaryOperatorExpression(@$,$2); }
    |   '*' expression %prec PREC_DEREFERENCE                   { $$ = new ArraySubscriptExpression(@$,new IntConstantExpression(@2,0)); }
    |   '&' expression %prec PREC_ADDRESS                       { $$ = new UnaryOperatorExpression(@$,$2); }
    |   expression '+' expression                               { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression '-' expression                               { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression '*' expression                               { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression '/' expression                               { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression '%' expression                               { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression '&' expression                               { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression '|' expression                               { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression '^' expression                               { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression T_BOOL_AND expression                        { $$ = new BinaryOperatorExpression(@$,"&&",$3); }
    |   expression T_BOOL_OR expression                         { $$ = new BinaryOperatorExpression(@$,"||",$3); }
    |   expression T_LEFT_SHIFT expression                      { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression T_RIGHT_SHIFT expression                     { $$ = new BinaryOperatorExpression(@$,$3); }
    |   expression T_EQUAL expression                           { $$ = new BinaryOperatorConditionExpression(@$,"==",$3); }
    |   expression T_NOT_EQUAL expression                       { $$ = new BinaryOperatorConditionExpression(@$,"!=",$3); }
    |   expression '<' expression                               { $$ = new BinaryOperatorConditionExpression(@$,"<",$3); }
    |   expression '>' expression                               { $$ = new BinaryOperatorConditionExpression(@$,">",$3); }
    |   expression T_LESS_OR_EQUAL expression                   { $$ = new BinaryOperatorConditionExpression(@$,"<=",$3); }
    |   expression T_GREATER_OR_EQUAL expression                { $$ = new BinaryOperatorConditionExpression(@$,">=",$3); }
    |   T_IDENT                                                 { $$ = new VarExpression(@$,*$1); delete $1; }
    |   expression '.' T_IDENT                                  { $$ = new StructMemberExpression(@$,*$3); delete $3; }
    |   expression T_ARROW T_IDENT                              { $$ = new StructMemberExpression(@$,*$3); delete $3; }
    |   expression '[' expression ']'                           { $$ = new ArraySubscriptExpression(@$,$3); }
    ;

argument_list:
        non_empty_argument_list                                 { $$ = $1; }
    |                                                           { $$ = new std::vector<Expression*>(); }
    ;

non_empty_argument_list:
        non_empty_argument_list ',' expression                  { $$ = $1; $1->push_back($3); }
    |   expression                                              { $$ = new std::vector<Expression*>({$1}); }
    ;

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)