www.pudn.com > pascallex.rar > lex.cpp
#include "stdafx.h" #include "common.h" #include "lex.h" #include#include #include #include #include #include void local_init(); void local_end(); static int _stdcall act_identity(char&c); static int _stdcall act_comment2(char&c); static int _stdcall act_string(char&c); static int _stdcall act_comment1(char&c); static int _stdcall act_chinese(char&c); static int _stdcall act_number(char&c,int state); static void *yy_flex_alloc ( unsigned int ); static void *yy_flex_realloc ( void *, unsigned int ); static void yy_flex_free ( void * ); static TUPLE result_tuple; static int yy_linenum=1; static int yy_stringpos; static int yy_init = 1; #ifdef YY_USER_INIT #undef YY_USER_INIT #endif #define YY_USER_INIT local_init() #ifdef YY_USER_END #undef YY_USER_END #endif #define YY_USER_END local_end() struct yy_buffer_state{ FILE *yy_input_file; char *yy_ch_buf;//输入缓冲区 char *yy_buf_pos;//输入缓冲区的当前位置 unsigned int yy_buf_size;//输入缓冲区的大小,不包括结束标志 int yy_n_chars;//输入缓冲区中字符数目,不包括结束标志 int yy_is_our_buffer;//是否拥有这个缓冲区,同智能指针的解决方案 int yy_is_interactive;//是否是交互式的输入源,如果是控制台则yy_input_file为空 int yy_at_bol;//是否在行开始 int yy_fill_buffer;//如果缓冲区数据用完,是否读入新的数据,如果为0的话表示文件结束 int yy_buffer_status;//缓冲区状态,下面有定义 #define YY_BUFFER_NEW 0 #define YY_BUFFER_NORMAL 1 #define YY_BUFFER_EOF_PENDING 2 }; typedef struct yy_buffer_state *YY_BUFFER_STATE; #define YY_END_OF_BUFFER_CHAR 0//结束标志 #define YY_BUF_SIZE 1024//定义缓冲区大小 #define YY_READ_BUF_SIZE 1024 //当前缓冲区和它的窗口变量 static YY_BUFFER_STATE yy_current_buffer = 0; #define YY_CURRENT_BUFFER yy_current_buffer static char yy_hold_char; static int yy_n_chars; #define YY_MORE_ADJ 0 static char *yytext; static char *yy_c_buf_p = (char *) 0; void yy_load_buffer_state( void ){ yy_n_chars = yy_current_buffer->yy_n_chars; yytext = yy_c_buf_p = yy_current_buffer->yy_buf_pos; yyin = yy_current_buffer->yy_input_file; yy_hold_char = *yy_c_buf_p; } //使b中的所有数据失效,如果b为当前缓冲区则重新装载窗口变量 void yy_flush_buffer( YY_BUFFER_STATE b ){ if ( ! b ) return; b->yy_n_chars = 0; b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; b->yy_buf_pos = &b->yy_ch_buf[0]; b->yy_at_bol = 1; b->yy_buffer_status = YY_BUFFER_NEW; if ( b == yy_current_buffer ) yy_load_buffer_state(); } //使b无效,把file放入b中 void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ){ yy_flush_buffer( b ); b->yy_input_file = file; b->yy_fill_buffer = 1; b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; } //创建一个包装好的缓冲区(file,size) YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ){ YY_BUFFER_STATE b; b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); if ( ! b ) yy_fatal_error( "out of dynamic memory in yy_create_buffer()" ); b->yy_buf_size = size; b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); if ( ! b->yy_ch_buf ) yy_fatal_error( "out of dynamic memory in yy_create_buffer()" ); b->yy_is_our_buffer = 1; yy_init_buffer( b, file ); return b; } // 释放缓冲区控制块和缓冲区 void yy_delete_buffer( YY_BUFFER_STATE b ){ if ( ! b ) return; if ( b == yy_current_buffer ) yy_current_buffer = (YY_BUFFER_STATE) 0; if ( b->yy_is_our_buffer ) yy_flex_free( (void *) b->yy_ch_buf ); yy_flex_free( (void *) b ); } /*定义yy_get_next_buffer的返回结果*/ #define EOB_ACT_CONTINUE_SCAN 0 #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 static int yy_get_next_buffer();//取下一个缓冲区 static int yyinput();//取下一个字符 static void yyunput( int c, register char *yy_bp ); void yyrestart( FILE *input_file ); #ifdef YY_STRING static int myinput(char* buf, int max); #define YY_INPUT(buf,result,max) {result = myinput(buf,max);} #else #define YY_INPUT(buf,result,max_size)\ if ( yy_current_buffer->yy_is_interactive )\ { \ int c = '*', n; \ for ( n = 0; n < max_size && \ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ if ( c == '\n' ) \ buf[n++] = (char) c; \ if ( c == EOF && ferror( yyin ) ) \ yy_fatal_error( "input in flex scanner failed" ); \ result = n; \ } \ else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \ && ferror( yyin ) ) \ yy_fatal_error( "input in flex scanner failed" ); #endif //内存分配 static void *yy_flex_alloc( unsigned int size ){ return (void *) malloc( size ); } static void *yy_flex_realloc( void *ptr, unsigned int size ){ return (void *) realloc( (char *) ptr, size ); } static void yy_flex_free( void *ptr ){ free( ptr ); } /* 放回数据,与下面取数据函数相反*/ static void yyunput( int c, register char *yy_bp ){ register char *yy_cp = yy_c_buf_p; if(c==10)yy_linenum--; *yy_cp = yy_hold_char; if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ){ register int number_to_move = yy_n_chars + 2; register char *dest = &yy_current_buffer->yy_ch_buf[ yy_current_buffer->yy_buf_size + 2]; register char *source = &yy_current_buffer->yy_ch_buf[number_to_move]; while ( source > yy_current_buffer->yy_ch_buf ) *--dest = *--source; yy_cp += (int) (dest - source); yy_bp += (int) (dest - source); yy_current_buffer->yy_n_chars = yy_n_chars = yy_current_buffer->yy_buf_size; if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) yy_fatal_error( "flex scanner push-back overflow" ); } yytext = yy_bp; yy_hold_char = c; yy_c_buf_p = --yy_cp; *yy_cp=0; } static int yyinput(){ int c; *yy_c_buf_p = yy_hold_char; if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ){ if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) *yy_c_buf_p = '\0';//数据本身为0,而不是缓冲区结束 else{ int offset = yy_c_buf_p - yytext; ++yy_c_buf_p; switch ( yy_get_next_buffer() ){//读下一个缓冲区 case EOB_ACT_LAST_MATCH: yyrestart( yyin ); case EOB_ACT_END_OF_FILE: return EOF; case EOB_ACT_CONTINUE_SCAN: yy_c_buf_p = yytext + offset; break; } } } c = *(unsigned char *) yy_c_buf_p; yy_hold_char = *++yy_c_buf_p; *yy_c_buf_p = '\0'; //形成字符串yytext if(c==10)yy_linenum++; return c; } //读取下一个有效的缓冲区 static int yy_get_next_buffer(){ register char *dest = yy_current_buffer->yy_ch_buf;//源区 register char *source = yytext;//目的区 register int number_to_move, i; int ret_val; if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )//如果越过了两个结束标志 yy_fatal_error("fatal flex scanner internal error--end of buffer missed" ); if ( yy_current_buffer->yy_fill_buffer == 0 ){ //文件结束 if ( yy_c_buf_p - yytext - YY_MORE_ADJ == 1 ){ return EOB_ACT_END_OF_FILE; }else{ return EOB_ACT_LAST_MATCH; } } /* Try to read more data. */ /*将yytext到yy_c_buf_p处的字符拷贝到缓冲区头部*/ number_to_move = (int) (yy_c_buf_p - yytext) - 1; for ( i = 0; i < number_to_move; ++i ) *(dest++) = *(source++); if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) yy_current_buffer->yy_n_chars = yy_n_chars = 0; else{ int num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; while ( num_to_read <= 0 ){ /* Not enough room in the buffer - grow it. */ /* just a shorter name for the current buffer */ YY_BUFFER_STATE b = yy_current_buffer; int yy_c_buf_p_offset = (int) (yy_c_buf_p - b->yy_ch_buf); if ( b->yy_is_our_buffer ){ int new_size = b->yy_buf_size * 2; if ( new_size <= 0 ) b->yy_buf_size += b->yy_buf_size / 8; else b->yy_buf_size *= 2; b->yy_ch_buf = (char *) /* Include room in for 2 EOB chars. */ yy_flex_realloc( (void *) b->yy_ch_buf, b->yy_buf_size + 2 ); }else /* Can't grow it, we don't own it. */ b->yy_ch_buf = 0; if ( ! b->yy_ch_buf ) yy_fatal_error("fatal error - scanner input buffer overflow" ); yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; } if ( num_to_read > YY_READ_BUF_SIZE ) num_to_read = YY_READ_BUF_SIZE; /*读入更多的字符,yy_n_chars为实践读入的字符数*/ YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), yy_n_chars, num_to_read ); yy_current_buffer->yy_n_chars = yy_n_chars; } if ( yy_n_chars == 0 ){ if ( number_to_move == YY_MORE_ADJ ){ ret_val = EOB_ACT_END_OF_FILE; yyrestart( yyin ); }else{ ret_val = EOB_ACT_LAST_MATCH; yy_current_buffer->yy_buffer_status = YY_BUFFER_EOF_PENDING; } }else ret_val = EOB_ACT_CONTINUE_SCAN; yy_n_chars += number_to_move; yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; yytext = &yy_current_buffer->yy_ch_buf[0]; return ret_val; } //用文件input_file重新启动当前缓冲区 void yyrestart( FILE *input_file ){ if ( ! yy_current_buffer ) yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); yy_init_buffer( yy_current_buffer, input_file ); yy_load_buffer_state(); } /* 如果在外部定义了YY_STRING表示数据源为字符串 * 字符串parseString由外部初始化,以作为数据源 * parseString,parseCh用来从字符串中读入原始数据*/ #ifdef YY_STRING char *parseString=0; static char *parseCh; static int myinput(char* buf, int max){ int len; if (parseCh == NULL) parseCh = parseString; len = strlen(parseCh); if (len > max) len = max; if (len > 0) memcpy(buf, parseCh, len); parseCh += len; return len; } #endif ///////////////////////////////////////////// int lex(){ //yy_init是一个静态变量初始值为1,表示要初始化 if ( yy_init ){ yy_init = 0;//表示已经执行初始化 //如果用户有自己的初始化函数,则调用它 #ifdef YY_USER_INIT YY_USER_INIT; #endif if ( ! yyin )//如果输入文件为NULL,则使用标准输入 yyin = stdin; if ( ! yyout )//如果输出文件为NULL,则使用标准输出 yyout = stdout; if ( ! yy_current_buffer )/*如果当前缓冲区为NULL,则创建*/ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); yy_load_buffer_state();/*使当前缓冲区状态与窗口参数一至*/ } int rc=LEX_NOTUPLE; register char c; while(rc==LEX_NOTUPLE){ c=yyinput(); if(c==EOF){ #ifdef YY_USER_END YY_USER_END; #endif return LEX_END; } yytext=yy_c_buf_p-1; if(c<0)rc=act_chinese(c);//中文 else if(isalpha(c)||(c=='_'))rc=act_identity(c);//标志符 else if(isdigit(c))rc=act_number(c,1);//实数 else switch(c){ case '{':rc=act_comment2(c);break;//注释2 case '\'':rc=act_string(c);break;//字符串或字符 case '(': c=yyinput(); if(c=='*')rc=act_comment1(c); else{ rc=LEX_SUCCESS; result_tuple.flag=o_lbracket; yyunput(c,yytext); } break; case '=': rc=LEX_SUCCESS; result_tuple.flag=o_eq; break; case ',': rc=LEX_SUCCESS; result_tuple.flag=o_comma; break; case ';': rc=LEX_SUCCESS; result_tuple.flag=o_semicolon; break; case '*': rc=LEX_SUCCESS; result_tuple.flag=o_time; break; case '/': rc=LEX_SUCCESS; result_tuple.flag=o_div; break; case ')': rc=LEX_SUCCESS; result_tuple.flag=o_rbracket; break; case '[': rc=LEX_SUCCESS; result_tuple.flag=o_lparent; break; case ']': rc=LEX_SUCCESS; result_tuple.flag=o_rparent; break; case '+': rc=LEX_SUCCESS; result_tuple.flag=o_plus; break; case '-': rc=LEX_SUCCESS; result_tuple.flag=o_minus; break; case ':': c=yyinput(); if(c!='='){ result_tuple.flag=o_colon; yyunput(c,yytext); }else result_tuple.flag=o_becomes; rc=LEX_SUCCESS; break; case '<': c=yyinput(); if(c=='=')result_tuple.flag=o_le; else if(c=='>')result_tuple.flag=o_ne; else{ result_tuple.flag=o_ls; yyunput(c,yytext); } rc=LEX_SUCCESS; break; case '>': c=yyinput(); if(c!='='){ result_tuple.flag=o_gt; yyunput(c,yytext); }else result_tuple.flag=o_ge; rc=LEX_SUCCESS; break; case '.': c=yyinput(); if(!isdigit(c)){ if(c=='.'){ result_tuple.flag=o_range; }else{ result_tuple.flag=o_period; yyunput(c,yytext); } rc=LEX_SUCCESS; }else{ rc=act_number(c,3); } break; case '\r': case '\n': case '\t': case ' ': break; default: fprintf(stderr,"%-8d非法字符%c\n",yy_linenum,c); } } return rc; } ///////////////////////////////// static int frw[11]={ 0, 0, 0, 5, 16, 25, 31, 35, 39, 40, 41}; struct s_keyword{char * s;keyword f;} t_keyword[]={ {"if",k_if},//0 {"of",k_of}, {"do",k_do}, {"to",k_to}, {"or",k_or}, {"var",k_var},//5 {"for",k_for}, {"end",k_end}, {"not",k_not}, {"and",k_and}, {"div",k_div},//10 {"mod",k_mod}, {"abs",f_abs}, {"sqr",f_sqr}, {"ord",f_ord}, {"chr",f_chr},//15 {"odd",f_odd}, {"type",k_type}, {"then",k_then}, {"else",k_else}, {"true",c_true},//20 {"char",t_char}, {"succ",f_succ}, {"pred",f_pred}, {"read",f_read}, {"const",k_const},//25 {"begin",k_begin}, {"while",k_while}, {"array",k_array}, {"false",c_false}, {"write",f_write},//30 {"downto",k_downto}, {"record",k_record}, {"maxint",c_maxint}, {"readln",f_readln}, {"program",k_program},//35 {"integer",t_integer}, {"boolean",t_boolean}, {"writeln",f_writeln}, {"function",k_function}, {"procedure",k_procedure}};//40 char * t_identity[LEX_MAX_IDENTITY+1]; int t_id_index; void local_init(){ char stringfile[FILENAME_LENGTH+2]; strcpy(stringfile,sourcefile); strcat(stringfile,".str"); yystr=fopen(stringfile,"w"); yy_stringpos=0; memset(t_identity,0,sizeof(t_identity)); t_id_index=1; } void local_end(){ int i; if(yystr!=NULL)fclose(yystr); fprintf(yyout,"\n下面是标志符表\n\n"); for(i=1;i<=LEX_MAX_IDENTITY;i++){ if(t_identity[i]!=0){ fprintf(yyout,"%-8d%s\n",i,t_identity[i]); delete[] t_identity[i]; } } } static int _stdcall act_identity(char&c){ do{ c=yyinput(); if(c==EOF)break; }while(isalpha(c)||(c=='_')||isdigit(c)); yyunput(c,yytext); //现在yytext就是所发现的标志符 int i,j,k; k=strlen(yytext); if(k<10){ i=frw[k];j=frw[k+1]-1; while((i<=j)&&(strcmp(yytext,t_keyword[i].s)!=0))i++; if(i<=j){ result_tuple.flag=t_keyword[i].f; return LEX_SUCCESS; } } t_identity[0]=yytext; i=t_id_index-1; while(strcmp(yytext,t_identity[i])!=0)i--; if(i==0){ if(t_id_index>LEX_MAX_IDENTITY) yy_fatal_error("标志符表溢出"); t_identity[i=t_id_index]=new char[k+1]; strcpy(t_identity[t_id_index++],yytext); } result_tuple.flag=identity; result_tuple.data.i_val.int_val1=i; return LEX_SUCCESS; } static int _stdcall act_comment2(char&c){ do{ c=yyinput(); if(c==EOF)break; }while(c!='}'); //此处为注释2 return LEX_NOTUPLE; } static int _stdcall act_string(char&c){ char temp[STRLENGTH+2]; register int i=0; bool once=true; loop: c=yyinput(); temp[i++]=c; if((c==10)||(c==EOF)){ fprintf(stderr,"%-8d缺少右单引号,定义字符串:%s\n",yy_linenum-1,yytext); once=false; goto exit; } if((i =STRLENGTH){ if(once){ fprintf(stderr,"%-8d字符串常量太长:%s\n",yy_linenum-1,yytext); once=false; } } goto loop; exit: yyunput(c,yytext); temp[i-1]=0; //temp中为字符串 i=strlen(temp); if(i==1){//为字符常量 result_tuple.flag=c_char; result_tuple.data.c_val=temp[0]; }else{ result_tuple.flag=c_string; result_tuple.data.i_val.int_val1=yy_stringpos; result_tuple.data.i_val.int_val2=i; yy_stringpos+=i+2; fprintf(yystr,"%s\n",temp); } return once?LEX_SUCCESS:LEX_WITH_ERROR; } static int _stdcall act_comment1(char&c){ char c1; c=yyinput(); do{ c1=c; c=yyinput(); if((c==')')&&(c1=='*')){ return LEX_NOTUPLE; } }while(c!=EOF); yyunput(c,yytext); return LEX_NOTUPLE; } static int _stdcall act_chinese(char&c){ do{ c=yyinput(); }while(c<0); yyunput(c,yytext); fprintf(stderr,"%-8d非法的中文字符:%s\n",yy_linenum,yytext); return LEX_NOTUPLE; } static int _stdcall act_number(char&c,int state){ register s=state; do{ switch(s){ case 1: s=2; break; case 2: if(isdigit(c)){ }else if(c=='.')s=4; else if(c=='E'||c=='e')s=6; else goto exit; break; case 3: s=5; break; case 4: if(isdigit(c))s=5; else if(c=='E'||c=='e')s=6; else goto exit; break; case 5: if(isdigit(c)); else if(c=='E'||c=='e')s=6; else goto exit; case 6: if(isdigit(c))s=8; else if(c=='+'||c=='-')s=7; else goto exit; break; case 7: if(isdigit(c))s=8; else goto exit; break; case 8: if(!isdigit(c))goto exit; } }while((c=yyinput())!=EOF); exit: yyunput(c,yytext); if(s==2||s==4){ result_tuple.flag=c_integer; if(s==4)yyunput('.',yytext); result_tuple.data.i_val.int_val1=atoi(yytext); }else{ result_tuple.flag=c_real; result_tuple.data.d_val=atof(yytext); } if(s==7||s==6){ fprintf(stderr,"%-8d错误的实数定义:%s\n",yy_linenum,yytext); return LEX_WITH_ERROR; }else return LEX_SUCCESS; } #ifdef _DEBUG void print_result(FILE*o){ fprintf(o,"%-8d%-8d%s\n",yy_linenum,result_tuple.flag,yytext); } #endif