www.pudn.com > WordScanner.rar > WordScanner.cpp


#include "stdio.h" 
#include "stdlib.h" 
#include "ctype.h" 
#include "string.h" 
 
#include "SymbolTable.h" //符号表 
#include "WordClass.h" //分类表 
 
#define OK 1 
#define ERROR 2 
bool word_scanner(FILE * fp);//词法分析 
int lookup(char * ptoken); 
int out(int id, char * pchar); 
int num_of_line=1; 
 
char TOKEN[20]; //读入字符的缓冲区 
id_struct * id_head=NULL; //全局变量,指向标识符结构体链表的开始 
FILE *fpw; 
 
void main() 
{ 
	printf("This is a word analysiser!\n"); 
	printf("编码\t单词\n"); 
	FILE *fp; 
	fp=fopen("测试程序.txt","r"); //读取测试程序分析 
	fpw=fopen("词法分析结果.txt","w");//保存分析结果的文件 
	fprintf(fpw,"%s\t%s\n","编码","单词"); 
 
	word_scanner(fp); //词法分析 
 
	fclose(fp); //关闭打开的文件 
} 
bool word_scanner(FILE * fp)//词法分析过程 
{ 
	bool end_file=false; 
	char ch; 
	int p_token=0; //标识TOKEN当前的位置 
	id_struct * kw_cur_position=id_head; //标识符当前位置指针 
	const_struct * int_cur_position=const_head; //常数当前位置指针 
	ch=fgetc(fp); 
	while(!end_file) 
	{ 
		//处理部分,为了方便,假设标识符与保留字间以空格分隔 
		if(isalpha(ch)) //判断是否为字母 
		{ 
			//若为字母,必为标识符或保留字 
			p_token=0; 
			TOKEN[p_token]=ch; 
			p_token++; 
			ch=fgetc(fp); //读取下一个字符 
			while(isalnum(ch)) 
			{ 
				//为数字或字母 
				TOKEN[p_token]=ch; 
				p_token++; 
				ch=fgetc(fp); 
			} 
			TOKEN[p_token]='\0'; 
			fseek(fp,-1,1); //将文件指针从当前位置上移一个位置 
			int is_keyword; 
			is_keyword=lookup(TOKEN); //检查是否为保留字 
			if(is_keyword) 
			{ 
				//是保留字 
				out(key_word_table[is_keyword-1].id ,key_word_table[is_keyword-1].name ); 
			} 
			else 
			{ 
				//是标识符 
				//将其加入到标识符链表中,并输出相应信息 
				id_struct * kw_next_position=NULL; 
				if(kw_cur_position==NULL) 
				{ 
					//开始时 
					char pchar[20]; 
					for(int i=0;i<20;i++) 
					{ 
						pchar[i]=TOKEN[i]; 
					} 
					id_head=(id_struct *)malloc(sizeof(id_struct)); 
					id_head->id =12; 
					id_head->p =pchar; 
					id_head->next =NULL; 
					kw_cur_position=id_head; 
					out(id_head->id ,id_head->p ); 
				} 
				else 
				{ 
					//非开始位置 
					char pchar[20]; 
					for(int i=0;i<20;i++) 
					{ 
						pchar[i]=TOKEN[i]; 
					} 
					kw_next_position=(id_struct *)malloc(sizeof(id_struct)); 
					kw_next_position->id =12; 
					kw_next_position->p =pchar; 
					kw_next_position->next =NULL; 
					kw_cur_position->next =kw_next_position; 
					kw_cur_position=kw_next_position; 
					out(kw_cur_position->id ,kw_cur_position->p ); 
				} 
			}//处理完关键字和保留字 
		} 
		else 
		{ 
			//分析开始符号为非字母的单词 
			if(isdigit(ch)) 
			{ 
				//如果为数字 
				p_token=0; 
				TOKEN[p_token]=ch; 
				ch=fgetc(fp); 
				p_token++; 
				while(isdigit(ch)) 
				{ 
					//继续读取数字 
					TOKEN[p_token]=ch; 
					p_token++; 
					ch=fgetc(fp); 
				} 
				TOKEN[p_token]='\0'; 
				fseek(fp,-1,1); 
				//加到常数链表中 
				const_struct * int_next_position=NULL; 
				if(const_head==NULL) 
				{ 
					//如果链表为空 
					const_head=(const_struct *)malloc(sizeof(const_struct)); 
					const_head->id =13; 
					const_head->value =TOKEN; 
					const_head->next =NULL; 
					int_cur_position=const_head; 
					out(const_head->id ,const_head->value ); 
				} 
				else 
				{ 
					//链表不为空 
					int_next_position=(const_struct *)malloc(sizeof(const_struct)); 
					int_next_position->id =13; 
					int_next_position->value =TOKEN; 
					int_next_position->next =NULL; 
					int_cur_position->next =int_next_position; 
					int_cur_position=int_next_position; 
					out(int_cur_position->id ,int_cur_position->value ); 
				} 
			}//分析完常数 
			else 
			{ 
				//分析各类界符 
				switch(ch) 
				{ 
				case '+': 
					printf("%d\t%s\n",14,"+"); 
					fprintf(fpw,"%d\t%s\n",14,"+"); 
					break; 
				case '-': 
					//需要保证为无符号整数 
				//	ch=fgetc(fp); 
				//	if(isdigit(ch)) 
				//	{ 
						//输入了负数 
				//		printf("%s%d%s\n","The scanner finds negtive at line ",num_of_line," in this program,please check it!"); 
				//		fprintf(fpw,"%s%d%s\n","The scanner finds negtive at line ",num_of_line," in this program,please check it!"); 
				//	} 
				//	else 
				//	{ 
						//运算符 
						printf("%d\t%s\n",15,"-"); 
						fprintf(fpw,"%d\t%s\n",15,"-"); 
				//	} 
					break; 
				case '*': 
					printf("%d\t%s\n",16,"*"); 
					fprintf(fpw,"%d\t%s\n",16,"*"); 
					break; 
				case ';': 
					printf("%d\t%s\n",17,";"); 
					fprintf(fpw,"%d\t%s\n",17,";"); 
					break; 
				case '(': 
					printf("%d\t%s\n",18,"("); 
					fprintf(fpw,"%d\t%s\n",18,"("); 
					break; 
				case ')': 
					printf("%d\t%s\n",19,")"); 
					fprintf(fpw,"%d\t%s\n",19,")"); 
					break; 
				case '=': 
					printf("%d\t%s\n",22,"="); 
					fprintf(fpw,"%d\t%s\n",22,"="); 
					break; 
				case '>': 
					ch=fgetc(fp); 
					if(ch=='=') 
					{ 
						printf("%d\t%s\n",24,">="); 
						fprintf(fpw,"%d\t%s\n",24,">="); 
					} 
					else 
					{ 
						fseek(fp,-1,1); 
						printf("%d\t%s\n",23,">"); 
						fprintf(fpw,"%d\t%s\n",23,">"); 
					} 
					break; 
				case '<': 
					ch=fgetc(fp); 
					if(ch=='=') 
					{ 
						printf("%d\t%s\n",21,"<="); 
						fprintf(fpw,"%d\t%s\n",21,"<="); 
					} 
					else 
					{ 
						if(ch=='>') 
						{ 
							printf("%d\t%s\n",25,"<>");	 
							fprintf(fpw,"%d\t%s\n",25,"<>"); 
						} 
						else 
						{ 
							fseek(fp,-1,1); 
							printf("%d\t%s\n",20,"<"); 
							fprintf(fpw,"%d\t%s\n",20,"<"); 
						} 
					} 
					break; 
				case ':': 
					ch=fgetc(fp); 
					if(ch=='=') 
					{ 
						printf("%d\t%s\n",27,":="); 
						fprintf(fpw,"%d\t%s\n",27,":="); 
					} 
					else 
					{ 
						fseek(fp,-1,1); 
						printf("%d\t%s\n",26,":"); 
						fprintf(fpw,"%d\t%s\n",26,":"); 
					} 
					break; 
				case '/': 
					ch=fgetc(fp); 
					if(ch=='*') 
					{ 
						//处理注释部分 
						//循环读入,直到遇到*/为止 
						bool is_end=false; 
						int thefirst=1; 
						while(!is_end) 
						{ 
							if(thefirst) 
							{ 
								ch=fgetc(fp); 
								thefirst=0; 
							} 
							while(ch!='*') 
							{ 
								ch=fgetc(fp); 
							} 
							ch=fgetc(fp); 
							if(ch=='/') 
							{ 
								//到注释尾 
								is_end=true; 
							} 
						} 
					} 
					else 
					{ 
						fseek(fp,-1,1); 
						printf("%d\t%s\n",28,"/"); 
						fprintf(fpw,"%d\t%s\n",28,"/"); 
					} 
					break; 
				case ' ': 
					//不进行特殊处理 
					break; 
				case '\r': 
					break; 
				case '\n': 
					num_of_line++; 
					break; 
				default: 
					if(isprint(ch)) 
					{ 
						printf("%s%d%s\n","There is a error at line ",num_of_line," !"); 
						printf("%s%c%s\n","The symbol '",ch,"' is not a acceptable symbol!"); 
						fprintf(fpw,"%s%d%s\n","There is a error at line ",num_of_line," !"); 
						fprintf(fpw,"%s%c%s\n","The symbol ",ch," is not a acceptable symbol!"); 
						//exit(0); //暂时按退出处理 
					} 
					//exit(0); //暂时按退出处理 
					break; 
				} 
			} 
		}//else 
		if(feof(fp)!=0) 
		{ 
			//文件结束 
			end_file=true; 
		} 
		else 
		{ 
			//读取下一个字符 
			ch=fgetc(fp); 
		} 
	}//while 
	return OK; 
} 
 
int lookup(char * ptoken) 
{ 
	//查询给的字符串是否为保留字 
	//若是,返回保留字位置, 
	//否则,返回0 
	for(int i=0;i<11;i++) 
	{ 
		if(0==strcmp(key_word_table[i].name,ptoken)) 
		{ 
			//返回位置 
			return i+1; 
		} 
	} 
	return 0; 
} 
 
int out(int id, char * pchar) 
{ 
	//以二元组的形式输出id和pchar指向的内容 
	printf("%d\t%s\n",id,pchar); 
	fprintf(fpw,"%d\t%s\n",id,pchar); 
	return 1; 
};