c编译器的语义阶段

问题描述

| 如果在示例c程序中写入1 = a,则不会将其检测为错误。我该如何解决这个问题?另外我该怎么做变量的全局和局部范围。谢谢有人能解决 clexer.lex源代码
D           [0-9]
L           [a-zA-Z_]
H           [a-fA-F0-9]
E           [Ee][+-]?{D}+
FS          (f|F|l|L)
IS          (u|U|l|L)*

%{
#include <stdio.h>
#include \"y.tab.h\"
int cnt=1;
int line=1;
char tempid[100];
%}

%%
\"/*\"            {comment();}

\"auto\"          { cnt+=yyleng;ECHO; return(AUTO); }
\"break\"         { cnt+=yyleng;ECHO; return(BREAK); }
\"case\"          { cnt+=yyleng;ECHO; return(CASE); }
\"char\"          { cnt+=yyleng;ECHO; return(CHAR); }
\"const\"         { cnt+=yyleng;ECHO; return(CONST); }
\"continue\"      { cnt+=yyleng;ECHO; return(CONTINUE); }
\"default\"       { cnt+=yyleng;ECHO; return(DEFAULT); }
\"do\"            { cnt+=yyleng;ECHO; return(DO); }
\"double\"        { cnt+=yyleng;ECHO; return(DOUBLE); }
\"else\"          { cnt+=yyleng;ECHO; return(ELSE); }
\"enum\"          { cnt+=yyleng;ECHO; return(ENUM); }
\"extern\"        { cnt+=yyleng;ECHO; return(EXTERN); }
\"float\"         { cnt+=yyleng;ECHO; return(FLOAT); }
\"for\"           { cnt+=yyleng;ECHO; return(FOR); }
\"goto\"          { cnt+=yyleng;ECHO; return(GOTO); }
\"if\"            { cnt+=yyleng;ECHO; return(IF); }
\"int\"           { cnt+=yyleng;ECHO; return(INT); }
\"long\"          { cnt+=yyleng;ECHO; return(LONG); }
\"register\"      { cnt+=yyleng;ECHO; return(REGISTER); }
\"return\"        { cnt+=yyleng;ECHO; return(RETURN); }
\"short\"         { cnt+=yyleng;ECHO; return(SHORT); }
\"signed\"        { cnt+=yyleng;ECHO; return(SIGNED); }
\"sizeof\"        { cnt+=yyleng;ECHO; return(SIZEOF); }
\"static\"        { cnt+=yyleng;ECHO; return(STATIC); }
\"struct\"        { cnt+=yyleng;ECHO; return(STRUCT); }
\"switch\"        { cnt+=yyleng;ECHO; return(SWITCH); }
\"typedef\"       { cnt+=yyleng;ECHO; return(TYPEDEF); }
\"union\"         { cnt+=yyleng;ECHO; return(UNION); }
\"unsigned\"      { cnt+=yyleng;ECHO; return(UNSIGNED); }
\"void\"          { cnt+=yyleng;ECHO; return(VOID); }
\"volatile\"      { cnt+=yyleng;ECHO; return(VOLATILE); }
\"while\"         { cnt+=yyleng;ECHO; return(WHILE); }
([\'])+({L}|{D})+([\'])           { cnt+=yyleng;ECHO; return(SINGLE); }
{L}({L}|{D})*       { cnt+=yyleng;ECHO; strcpy(tempid,yytext);return(IDENTIFIER); }

0[xX]{H}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
0{D}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+{IS}?       { cnt+=yyleng;ECHO; return(CONSTANT); }
L?\'(\\\\.|[^\\\\\'])+\'   { cnt+=yyleng;ECHO; return(CONSTANT); }

{D}+{E}{FS}?        { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}*\".\"{D}+({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+\".\"{D}*({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }

L?\\\"(\\\\.|[^\\\\\"])*\\\" { cnt+=yyleng;ECHO; return(STRING_LITERAL); }

\"...\"           { cnt+=yyleng;ECHO; return(ELLIPSIS); }
\">>=\"           { cnt+=yyleng;ECHO; return(RIGHT_ASSIGN); }
\"<<=\"           { cnt+=yyleng;ECHO; return(LEFT_ASSIGN); }
\"+=\"            { cnt+=yyleng;ECHO; return(ADD_ASSIGN); }
\"-=\"            { cnt+=yyleng;ECHO; return(SUB_ASSIGN); }
\"*=\"            { cnt+=yyleng;ECHO; return(MUL_ASSIGN); }
\"/=\"            { cnt+=yyleng;ECHO; return(DIV_ASSIGN); }
\"%=\"            { cnt+=yyleng;ECHO; return(MOD_ASSIGN); }
\"&=\"            { cnt+=yyleng;ECHO; return(AND_ASSIGN); }
\"^=\"            { cnt+=yyleng;ECHO; return(XOR_ASSIGN); }
\"|=\"            { cnt+=yyleng;ECHO; return(OR_ASSIGN); }
\">>\"            { cnt+=yyleng;ECHO; return(RIGHT_OP); }
\"<<\"            { cnt+=yyleng;ECHO; return(LEFT_OP); }
\"++\"            { cnt+=yyleng;ECHO; return(INC_OP); }
\"--\"            { cnt+=yyleng;ECHO; return(DEC_OP); }
\"->\"            { cnt+=yyleng;ECHO; return(PTR_OP); }
\"&&\"            { cnt+=yyleng;ECHO; return(AND_OP); }
\"||\"            { cnt+=yyleng;ECHO; return(OR_OP); }
\"<=\"            { cnt+=yyleng;ECHO; return(LE_OP); }
\">=\"            { cnt+=yyleng;ECHO; return(GE_OP); }
\"==\"            { cnt+=yyleng;ECHO; return(EQ_OP); }
\"!=\"            { cnt+=yyleng;ECHO; return(NE_OP); }
\";\"         { cnt+=yyleng;ECHO; return(\';\'); }
(\"{\"|\"<%\")      { cnt+=yyleng;ECHO; return(\'{\'); }
(\"}\"|\"%>\")      { cnt+=yyleng;ECHO; return(\'}\'); }
\",\"         { cnt+=yyleng;ECHO; return(\',\'); }
\":\"         { cnt+=yyleng;ECHO; return(\':\'); }
\"=\"         { cnt+=yyleng;ECHO; return(\'=\'); }
\"(\"         { cnt+=yyleng;ECHO; return(\'(\'); }
\")\"         { cnt+=yyleng;ECHO; return(\')\'); }
(\"[\"|\"<:\")      { cnt+=yyleng;ECHO; return(\'[\'); }
(\"]\"|\":>\")      { cnt+=yyleng;ECHO; return(\']\'); }
\".\"         { cnt+=yyleng;ECHO; return(\'.\'); }
\"&\"         { cnt+=yyleng;ECHO; return(\'&\'); }
\"!\"         { cnt+=yyleng;ECHO; return(\'!\'); }
\"~\"         { cnt+=yyleng;ECHO; return(\'~\'); }
\"-\"         { cnt+=yyleng;ECHO; return(\'-\'); }
\"+\"         { cnt+=yyleng;ECHO; return(\'+\'); }
\"*\"         { cnt+=yyleng;ECHO; return(\'*\'); }
\"/\"         { cnt+=yyleng;ECHO; return(\'/\'); }
\"%\"         { cnt+=yyleng;ECHO; return(\'%\'); }
\"<\"         { cnt+=yyleng;ECHO; return(\'<\'); }
\">\"         { cnt+=yyleng;ECHO; return(\'>\'); }
\"^\"         { cnt+=yyleng;ECHO; return(\'^\'); }
\"|\"         { cnt+=yyleng;ECHO; return(\'|\'); }
\"?\"         { cnt+=yyleng;ECHO; return(\'?\'); }

[ ]         {cnt+=yyleng;ECHO;}
[\\t\\v\\f]        { cnt+=yyleng; }
[\\n]            {line++;cnt=1;}
.           { /* ignore bad characters */ }

%%
yywrap()
{
    return(1);
}
comment()
{
    char c,c1;
loop:
    while ((c = input()) != \'*\' && c != 0)
    {
        if(c==\'\\n\') {line++;cnt=1;} 
        else    {cnt++;}
    }
        //putchar(c); PUTCHAR only if comments need to be shown! 
    if ((c1 = input()) != \'/\' && c1 != 0)
    {
        unput(c1);
        goto loop;
    }
}
cparser.yacc源代码
%{
#include <stdio.h>
#include <string.h>
#include \"symbol_table.h\"
extern FILE *yyin;
extern FILE *yyout;
extern int column;
extern int line;
extern int cnt;
extern char *yytext,tempid[100];
int temp,err,err1=0;

install()
{ 
    symrec *s;
    s = getsym (tempid);
    if (s == 0)
    s = putsym (tempid,temp);
    else 
    {
        printf(\" VOID=1 \");
     printf(\" CHAR=2 \");
     printf(\" INT=3 \");
     printf(\" FLOAT=4 \");
     printf(\" DOUBLE=4 \");
        printf( \"\\n\\nThere is a Semantic error at Pos : %d : %d : %s is already defined as %d\\n\\n\",line,cnt,s->name,s->type );
        exit(0);    
    }
    err1=1;
}
int context_check()
{ 
    symrec *s;
    s = getsym(tempid); 
    if (s == 0 )
    {printf( \"\\n\\nThere is a Semantic error at Pos : %d : %d : %s is an undeclared identifier\\n\\n\",tempid);exit(0);return 0;}
    else
    return(s->type);
    err1=1;

}
type_err(int t1,int t2)
{
    if(t1&&t2)
    {
     printf(\" VOID=1 \");
     printf(\" CHAR=2 \");
     printf(\" INT=3 \");
     printf(\" FLOAT=4 \");
     printf(\" DOUBLE=4 \");  
    printf( \"\\n\\nThere is a Semantic error at Pos : %d : %d : Type mismatch for %s between %d and %d \\n\\n\",tempid,t1,t2);
    err1=1;
    exit(0);    
    }   
}

%}



%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME SINGLE

%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS

%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE

%start translation_unit
%%

primary_expression
    : IDENTIFIER    {$$=context_check();}
    | CONSTANT
    | STRING_LITERAL
    | \'(\' expression \')\' {$$= $2;}
    ;

postfix_expression
    : primary_expression    {$$=$1;}
    | postfix_expression \'[\' expression \']\'
    | postfix_expression \'(\' \')\'
    | postfix_expression \'(\' argument_expression_list \')\'
    | postfix_expression \'.\' IDENTIFIER 
    | postfix_expression PTR_OP IDENTIFIER
    | postfix_expression INC_OP
    | postfix_expression DEC_OP
    ;

argument_expression_list
    : assignment_expression
    | argument_expression_list \',\' assignment_expression
    ;

unary_expression
    : postfix_expression    {$$=$1;}
    | INC_OP unary_expression
    | DEC_OP unary_expression
    | unary_operator cast_expression
    | SIZEOF unary_expression
    | SIZEOF \'(\' type_name \')\'
    ;

unary_operator
    : \'&\'
    | \'*\'
    | \'+\'
    | \'-\'
    | \'~\'
    | \'!\'
    ;

cast_expression
    : unary_expression  {$$=$1;}
    | \'(\' type_name \')\' cast_expression
    ;

multiplicative_expression
    : cast_expression   {$$=$1;}
    | multiplicative_expression \'*\' cast_expression
    | multiplicative_expression \'/\' cast_expression
    | multiplicative_expression \'%\' cast_expression
    ;

additive_expression
    : multiplicative_expression {$$=$1;}
    | additive_expression \'+\' multiplicative_expression
    | additive_expression \'-\' multiplicative_expression
    ;

shift_expression
    : additive_expression   {$$=$1;}
    | shift_expression LEFT_OP additive_expression
    | shift_expression RIGHT_OP additive_expression
    ;

relational_expression
    : shift_expression  {$$=$1;}
    | relational_expression \'<\' shift_expression
    | relational_expression \'>\' shift_expression
    | relational_expression LE_OP shift_expression
    | relational_expression GE_OP shift_expression
    ;

equality_expression
    : relational_expression {$$=$1;}
    | equality_expression EQ_OP relational_expression
    | equality_expression NE_OP relational_expression
    ;

and_expression
    : equality_expression   {$$=$1;}
    | and_expression \'&\' equality_expression
    ;

exclusive_or_expression
    : and_expression    {$$=$1;}
    | exclusive_or_expression \'^\' and_expression
    ;

inclusive_or_expression 
    : exclusive_or_expression   {$$=$1;}
    | inclusive_or_expression \'|\' exclusive_or_expression
    ;

logical_and_expression
    : inclusive_or_expression   {$$=$1;}
    | logical_and_expression AND_OP inclusive_or_expression
    ;

logical_or_expression
    : logical_and_expression    {$$=$1;}
    | logical_or_expression OR_OP logical_and_expression
    ;

conditional_expression
    : logical_or_expression {$$=$1;}
    | logical_or_expression \'?\' expression \':\' conditional_expression
    ;

assignment_expression
    : conditional_expression    {$$=$1;}
    | unary_expression assignment_operator assignment_expression    {if($1!=$3){type_err($1,$3);}}
    ;

assignment_operator
    : \'=\'
    | MUL_ASSIGN
    | DIV_ASSIGN
    | MOD_ASSIGN
    | ADD_ASSIGN
    | SUB_ASSIGN
    | LEFT_ASSIGN
    | RIGHT_ASSIGN
    | AND_ASSIGN
    | XOR_ASSIGN
    | OR_ASSIGN
    ;

expression
    : assignment_expression {$$=$1;}
    | expression \',\' assignment_expression
    ;

constant_expression
    : conditional_expression
    ;

declaration
    : declaration_specifiers \';\'
    | declaration_specifiers init_declarator_list \';\'
    ;

declaration_specifiers
    : storage_class_specifier
    | storage_class_specifier declaration_specifiers
    | type_specifier
    | type_specifier declaration_specifiers
    | type_qualifier
    | type_qualifier declaration_specifiers
    ;

init_declarator_list
    : init_declarator
    | init_declarator_list \',\' init_declarator
    ;

init_declarator
    : declarator
    | declarator \'=\' initializer
    ;

storage_class_specifier
    : TYPEDEF
    | EXTERN
    | STATIC
    | AUTO
    | REGISTER
    ;

type_specifier
    : VOID  {temp=1;}
    | CHAR  {temp=2;}
    | SHORT {temp=3;}
    | INT   {temp=3;}
    | LONG  {temp=3;}
    | FLOAT {temp=4;}
    | DOUBLE    {temp=4;}
    | SIGNED
    | UNSIGNED
    | struct_or_union_specifier
    | enum_specifier
    | TYPE_NAME
    ;

struct_or_union_specifier
    : struct_or_union IDENTIFIER \'{\' struct_declaration_list \'}\'    {install();}
    | struct_or_union \'{\' struct_declaration_list \'}\'
    | struct_or_union IDENTIFIER    {install();}
    ;

struct_or_union
    : STRUCT
    | UNION
    ;

struct_declaration_list
    : struct_declaration
    | struct_declaration_list struct_declaration
    ;

struct_declaration
    : specifier_qualifier_list struct_declarator_list \';\'
    ;

specifier_qualifier_list
    : type_specifier specifier_qualifier_list
    | type_specifier
    | type_qualifier specifier_qualifier_list
    | type_qualifier
    ;

struct_declarator_list
    : struct_declarator
    | struct_declarator_list \',\' struct_declarator
    ;

struct_declarator
    : declarator
    | \':\' constant_expression
    | declarator \':\' constant_expression
    ;

enum_specifier
    : ENUM \'{\' enumerator_list \'}\'
    | ENUM IDENTIFIER \'{\' enumerator_list \'}\'
    | ENUM IDENTIFIER
    ;

enumerator_list
    : enumerator
    | enumerator_list \',\' enumerator
    ;

enumerator
    : IDENTIFIER    {context_check();}
    | IDENTIFIER \'=\' constant_expression    //{context_check();}
    ;

type_qualifier
    : CONST
    | VOLATILE
    ;

declarator
    : pointer direct_declarator
    | direct_declarator
    ;

direct_declarator
    : IDENTIFIER    {install();}
    | \'(\' declarator \')\'
    | direct_declarator \'[\' constant_expression \']\'
    | direct_declarator \'[\' \']\'
    | direct_declarator \'(\' parameter_type_list \')\'
    | direct_declarator \'(\' identifier_list \')\'
    | direct_declarator \'(\' \')\'
    ;

pointer
    : \'*\'
    | \'*\' type_qualifier_list
    | \'*\' pointer
    | \'*\' type_qualifier_list pointer
    ;

type_qualifier_list
    : type_qualifier
    | type_qualifier_list type_qualifier
    ;


parameter_type_list
    : parameter_list
    | parameter_list \',\' ELLIPSIS
    ;

parameter_list
    : parameter_declaration
    | parameter_list \',\' parameter_declaration
    ;

parameter_declaration
    : declaration_specifiers declarator
    | declaration_specifiers abstract_declarator
    | declaration_specifiers
    ;

identifier_list
    : IDENTIFIER    {install();}
    | identifier_list \',\' IDENTIFIER    {install();}
    ;

type_name
    : specifier_qualifier_list
    | specifier_qualifier_list abstract_declarator
    ;

abstract_declarator
    : pointer
    | direct_abstract_declarator
    | pointer direct_abstract_declarator
    ;

direct_abstract_declarator
    : \'(\' abstract_declarator \')\'
    | \'[\' \']\'
    | \'[\' constant_expression \']\'
    | direct_abstract_declarator \'[\' \']\'
    | direct_abstract_declarator \'[\' constant_expression \']\'
    | \'(\' \')\'
    | \'(\' parameter_type_list \')\'
    | direct_abstract_declarator \'(\' \')\'
    | direct_abstract_declarator \'(\' parameter_type_list \')\'
    ;

initializer
    : assignment_expression {$$=$1;}
    | \'{\' initializer_list \'}\'
    | \'{\' initializer_list \',\' \'}\'
    ;

initializer_list
    : initializer
    | initializer_list \',\' initializer
    ;

statement
    : labeled_statement
    | compound_statement
    | expression_statement
    | selection_statement
    | iteration_statement
    | jump_statement
    ;

labeled_statement
    : IDENTIFIER \':\' statement  //{context_check();}
    | CASE constant_expression \':\' statement
    | DEFAULT \':\' statement
    ;

compound_statement
    : \'{\' \'}\'
    | \'{\' statement_list \'}\'
    | \'{\' declaration_list \'}\'
    | \'{\' declaration_list statement_list \'}\'
    ;

declaration_list
    : declaration
    | declaration_list declaration
    ;

statement_list
    : statement
    | statement_list statement
    ;

expression_statement
    : \';\'
    | expression \';\'
    ;

selection_statement
    : IF \'(\' expression \')\' statement  %prec LOWER_THAN_ELSE ;

    | IF \'(\' expression \')\' statement ELSE statement
    | SWITCH \'(\' expression \')\' statement
    ;

iteration_statement
    : WHILE \'(\' expression \')\' statement
    | DO statement WHILE \'(\' expression \')\' \';\'
    | FOR \'(\' expression_statement expression_statement \')\' statement
    | FOR \'(\' expression_statement expression_statement expression \')\' statement
    ;

jump_statement
    : GOTO IDENTIFIER \';\'   //{context_check();}
    | CONTINUE \';\'
    | BREAK \';\'
    | RETURN \';\'
    | RETURN expression \';\'
    ;

translation_unit
    : external_declaration
    | translation_unit external_declaration
    ;

external_declaration
    : function_definition
    | declaration
    ;

function_definition
    : declaration_specifiers declarator declaration_list compound_statement
    | declaration_specifiers declarator compound_statement
    | declarator declaration_list compound_statement
    | declarator compound_statement
    ;
%%
yyerror(s)
char *s;
{
    fflush(stdout);err=1;
    printf(\"Syntax error at Pos : %d : %d\\n\",cnt);
    exit(0);
    //printf(\"\\n%*s\\n%*s\\n\",column,\"^\",s);
}
main(argc,argv)
int argc;
char **argv;
{

    char *fname;    
    ++argv,--argc;/*skip program name*/
    if(argc>0)
    {
        yyin=fopen(argv[0],\"r\");
        fname=argv[0];
        strcat(fname,\"_output\");
        yyout=fopen(fname,\"w\");
    }
    else
    {
        printf(\"Please give the c filename as an argument.\\n\");
    }
    yyparse();
    if(err==0)
    printf(\"No Syntax errors found!\\n\");
    fname=argv[0];strcat(fname,\"_symbol-table\");
    FILE *sym_tab=fopen(fname,\"w\");
    fprintf(sym_tab,\"Type\\tSymbol\\n\");
    symrec *ptr;    
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    {
        fprintf(sym_tab,\"%d\\t%s\\n\",ptr->type,ptr->name);
    }
    fclose(sym_tab);    

}   
Symbol table.h源代码
#define t_void  1
#define t_char  2
#define t_int   3
#define t_float 4
struct symrec
{
    char *name;
    int type;
    struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putsym(char *sym_name,int sym_type)
{
    symrec *ptr;
    ptr=(symrec *)malloc(sizeof(symrec));
    ptr->name=(char *)malloc(strlen(sym_name)+1);
    strcpy(ptr->name,sym_name);
    ptr->type=sym_type;
    ptr->next=(struct symrec *)sym_table;
    sym_table=ptr;
    return ptr;
}
symrec *getsym(char *sym_name)
{
    symrec *ptr;
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    if(strcmp(ptr->name,sym_name)==0)
    return ptr;
    return 0;
}
    

解决方法

一般而言,当您有赋值操作时,您需要检查左操作数以确保其左值,如果没有,则发出错误。这通常是类型检查的一部分-您将值的属性(例如,是否为左值)与类型一起保留,并检查这些属性对于值的每次使用是否正确。 因此,您可能要做的是使用
%union
定义可以保存此信息的解析器值对象:
%union {
    struct {
        Type  *type;
        int   is_lvalue;
    } valinfo;
}
%type<valinfo> assignment_expression unary_expression
然后,您的分配规则将检查此类型以及类型:
assignment_expression:
    unary_expression assignment_operator assignment_expression {
        if (!$1.is_lvalue)
            error(\"assigning to non-lvalue\");
        if ($1.type != $3.type && !type_is_implicitly_convertable($3.type,$1.type))
            error(\"type mismatch in assignment\");
        $$.type = $1. type;
        $$.is_lvalue = 0; }
请注意,您需要确保在每个规则操作中正确设置$$,而该值可能会被其他规则操作使用;您的代码无法执行此操作,因此很可能不会做任何有用的事情。     

相关问答

依赖报错 idea导入项目后依赖报错,解决方案:https://blog....
错误1:代码生成器依赖和mybatis依赖冲突 启动项目时报错如下...
错误1:gradle项目控制台输出为乱码 # 解决方案:https://bl...
错误还原:在查询的过程中,传入的workType为0时,该条件不起...
报错如下,gcc版本太低 ^ server.c:5346:31: 错误:‘struct...