www.pudn.com > 李广军-0201-20-词法分析.rar > cifa.c


/********************************************/ 
/*  词法分析程序                            */ 
/*  作者:李广军                            */ 
/*  学号:1402093120                        */ 
/*  计算机科学与信息工程系 0201班           */ 
/*  Copyright (C) 2005 by loverlife workroom*/ 
/*  ALL Rights Reserved.                    */ 
/********************************************/   
 
 
#include  
#include  
#include  
int key_count; 
int id_count; 
 
/**********************************************/ 
/*  初始化函数                                */ 
/*1 保留字、2 标识符、3 常数、4 运算符、5 界符*/ 
/**********************************************/ 
void initialize() 
{ 
    char *keywords[] = {" ",                    /*0号单元不用*/ 
	                    "auto","break","case","char","const", 
					    "continue","default","do","double","else", 
				   	    "enum","extern","float","for","goto", 
					    "if","int","long","register","return", 
					    "short","signed","sizeof","static","struct", 
					    "switch","typedef","union","unsigned","void", 
					    "volatile","while"};      
	                     /*C语言所有关键字,共32个*/ 
 
 
	char *operators[] = {" ", 
		                 "(", ")", "[", "]", ".", "->", "!",  
						 "~", "++", "--", "-", "&", "*", "(type)", 
						 "sizeof()", "/", "%", "+", "<<", ">>", "<", 
						 "<=", ">", ">=", "==", "!=", "^", "|", 
						 "&&", "||", "?:", "=", "+=", "-=", "*=",  
						 "/=", ","}; 
	                     /*运算符,共37个*/ 
 
	char *seperators[] = {" ", 
	                   	  ";", "{", "}", "#", ".", "'"}; 
	                     /*限界符,共7个*/ 
 
    FILE *fp; 
    int i; 
    char c; 
 
    fp = fopen("E:\\cifa\\keyword.txt", "w"); 
    for(i = 1; i <= 32; i ++) 
       fprintf(fp, "%s\n", keywords[i]); 
    fclose(fp);               /*初始化关键字表*/ 
 
 
    fp = fopen("E:\\cifa\\operator.txt", "w"); 
    for(i = 1; i <= 37; i ++) 
       fprintf(fp, "%s\n", operators[i]); 
	fclose(fp);                /*初始化运算符表*/ 
 
    fp = fopen("E:\\cifa\\seperator.txt", "w"); 
    for(i = 1; i <= 6; i ++) 
       fprintf(fp, "%s\n", seperators[i]); 
    c = '"'; 
    fprintf(fp, "%c\n", c); 
    fclose(fp);               /*初始化限界符表*/ 
 
 
    fp = fopen("E:\\cifa\\identity.txt", "w"); 
    fclose(fp);               /*初始化标识符表*/ 
 
 
    fp = fopen("E:\\cifa\\constant.txt", "w"); 
    fclose(fp);               /*初始化常数表*/ 
 
 
    fp = fopen("E:\\cifa\\output.txt", "w"); 
    fclose(fp);               /*初始化输出文件*/ 
} 
 
/********************************************/ 
/*  查表或造表函数                          */ 
/********************************************/ 
int find(char *buf, int type, int command) 
{ 
    int number = 0; 
    FILE *fp; 
    char c; 
    char temp[30]; 
    int i = 0; 
    switch(type) 
    { 
	case 1: fp = fopen("E:\\cifa\\keyword.txt", "r");break; 
	case 2: fp = fopen("E:\\cifa\\identity.txt", "r");break; 
	case 3: fp = fopen("E:\\cifa\\constant.txt", "r");break; 
    case 4: fp = fopen("E:\\cifa\\operator.txt", "r");break; 
	case 5: fp = fopen("E:\\cifa\\seperator.txt", "r");  
    } 
 
    c = fgetc(fp); 
    while(c != EOF) 
    { 
		while(c != '\n') 
        { 
            temp[i++] = c; 
            c = fgetc(fp); 
        } 
        temp[i] = '\0'; 
        i = 0; 
        number ++; 
        if(strcmp(temp,buf) == 0) 
        { 
		    fclose(fp); 
			return(number);        /*若找到,返回在相应表中的序号*/ 
        } 
        else 
           c = fgetc(fp); 
    } 
 
    if(command == 1) 
     { 
	     fclose(fp); 
		 return(0);                /*找不到,当只需查表,返回0,否则还需造表*/ 
     } 
	 
    fclose(fp); 
	switch(type) 
	{ 
	 case 1: fp = fopen("E:\\cifa\\keyword.txt", "a");break; 
	 case 2: fp = fopen("E:\\cifa\\identity.txt", "a");id_count++;break; 
	 case 3: fp = fopen("E:\\cifa\\constant.txt", "a");break; 
     case 4: fp = fopen("E:\\cifa\\operator.txt", "a");break; 
     case 5: fp = fopen("E:\\cifa\\seperator.txt", "a");  
	} 
 
	fprintf(fp, "%s\n", buf); 
    fclose(fp); 
    return(number+1);             /*造表时,将字符串添加到表尾并返回序号值*/ 
} 
 
/*******************************************/ 
/*  数字串处理函数                         */ 
/*******************************************/ 
void constant_manage(char *buffer) 
{ 
    FILE *fp; 
	int result; 
    result = find(buffer, 3, 2);       /*先查常数表,若找不到则造入常数表并返回序号值*/ 
 
    fp = fopen("E:\\cifa\\output.txt", "a"); 
	fprintf(fp, "3\t\t\t%s\t\t\t%d\n", buffer, result); 
	fclose(fp);                                 /*写入输出文件*/ 
} 
 
 
/*******************************************/ 
/*    字符串处理函数                       */ 
/*******************************************/ 
void char_manage(char *buffer) 
{ 
    FILE *fp; 
    int result; 
    result = find(buffer, 1, 1);                    /*先查关键字表*/ 
    fp = fopen("E:\\cifa\\output.txt", "a"); 
 
    if(result != 0) 
	{ 
		fprintf(fp, "1\t\t\t%s\t\t\t%d\n", buffer, result);    /*若找到,写入输出文件*/ 
		key_count++; 
	} 
    else 
    { 
        result = find(buffer, 2, 2); 
		               /*若找不到,则非关键字,查标识符表,还找不到则造入标识符表*/ 
        fprintf(fp, "2\t\t\t%s\t\t\t%d\n", buffer, result); /*写入输出文件*/ 
    }                                             
     
    fclose(fp); 
} 
 
 
/******************************************/ 
/*  出错处理函数                          */ 
/******************************************/ 
void error_manage(char error, int lineno) 
{ 
	FILE *fp; 
	fp = fopen("E:\\cifa\\output.txt", "a"); 
    fprintf(fp, "error: %c ,line %d\n", error, lineno); /*报告出错符号和所在行数*/ 
	fclose(fp); 
} 
 
/*******************************************/ 
/*  扫描输入源程序                         */ 
/*******************************************/ 
void scanner() 
{ 
    FILE *fp, *fp1, *fp2; 
    char filename[30]; 
    char ch; 
    int  i = 0, line = 1; 
    int count, result, errorno = 0; 
	char array[30]; 
    char *word; 
 
	printf("\nPlease input the file name:"); 
    scanf("%s", filename); 
    if((fp1 = fopen(filename, "r")) == NULL) 
    { 
        printf("Cannot open file!"); 
        exit(0); 
    } 
   
    ch = fgetc(fp1); 
 
    while(ch != EOF)            /*按字符依次扫描源程序,直至结束*/ 
    { 
        i = 0; 
 
        if( ((ch>='A') && (ch<='Z')) || ((ch>='a') && (ch<='z')) || (ch=='_') ) 
        {                                      /*以字母或下划线开头*/ 
            while( ((ch >= 'A')&&( ch <= 'Z' )) || ((ch>='a')&&(ch<='z')) || 
				   (ch == '_') || ((ch >= '0')&&(ch <= '9')) ) 
            { 
                array[i++] = ch; 
                ch = fgetc(fp1); 
            }                                 /*字母数字下划线的组合*/ 
 
            word = (char *)malloc( (i+1)*sizeof(char) ); 
			memcpy(word, array, i);           /*将array中的前i个字符拷贝到指针word中*/ 
			word[i] = '\0'; 
 
            char_manage(word);                  /*字符串处理*/ 
 
			word=NULL; 
            free(word); 
            if(ch != EOF) 
			   fseek(fp1, -1L, SEEK_CUR);       /*文件指针定位在前的一个位置*/ 
        }/*判断关键字、标识符结束*/ 
 
        else if(ch >= '0' && ch <= '9')            /*以数字开头*/ 
        {            
            while( ((ch>='0')&&(ch<='9')) || ch == '.'  
				   || ch == 'e' || ch == 'E' || ch == '-') 
            { 
                array[i++] = ch; 
                ch = fgetc(fp1); 
            }                                   /*数字的组合*/ 
 
            word = (char *)malloc( (i+1)*sizeof(char) ); 
			memcpy(word, array, i); 
			word[i] = '\0'; 
            constant_manage(word); 
      
            word=NULL; 
            free(word); 
			if(ch != EOF) 
			   fseek(fp1, -1L, SEEK_CUR); 
        }/*判断数字组合结束*/ 
 
        else if( (ch == ' ') || (ch == '\t') ) 
			;                                /*滤除空格符和水平制表符*/ 
 
		else if(ch == '\n') 
			line++;                          /*滤除回车符,行数加1*/ 
 
	 
		else if(ch == '/')                   /*过滤注释*/ 
        {                                     
			ch = fgetc(fp1); 
 
			if(ch == '=')                     /* 若为'/='符号*/ 
			{                                 
				fp2 = fopen("E:\\cifa\\output.txt", "a"); 
				fprintf(fp2, "4\t\t\t/=\t\t\t32\n"); 
                fclose(fp2); 
			} 
			else if(ch != '*') 
            {                               /*若为除号,写入输出文件*/ 
                fp2 = fopen("E:\\cifa\\output.txt", "a"); 
				fprintf(fp2, "4\t\t\t/\t\t\t13\n"); 
                fclose(fp2); 
				fseek(fp1, -1L, SEEK_CUR); 
			} 
			else if(ch == '*') 
			{                            /*若为注释的开始,消除包含在里面的所有字符*/ 
				count = 0; 
				ch = fgetc(fp1); 
				while(count != 2) 
                {            /*当扫描到'*'且紧接着下一个字符为'/'才是注释的结束*/ 
					count = 0; 
					while(ch != '*') 
						ch = fgetc(fp1); 
 
					count++; 
					ch = fgetc(fp1); 
					if(ch == '/') 
						count++; 
					else 
						ch = fgetc(fp1); 
				} 
			} 
		}/*过滤注释结束*/ 
 
 
		else if(ch == '"') 
		{                                         /*消除包含在双引号中的字符串常量*/ 
			fp2 = fopen("E:\\cifa\\output.txt", "a"); 
 
			fprintf(fp2, "5\t\t\t%c\t\t\t7\n", ch); 
			ch = fgetc(fp1); 
 
			while(ch != '"') 
				ch = fgetc(fp1); 
			 
			fprintf(fp2, "5\t\t\t%c\t\t\t7\n", ch); 
			fclose(fp2); 
		} 
 
		else 
        {                           /*首字符为其它字符,即运算限界符或非法字符*/ 
            array[0] = ch; 
            ch = fgetc(fp1);        /*再读入下一个字符,判断是否为双目运算符、限界符*/ 
            if(ch != EOF) 
            {                       /*若该字符非文件结束符*/ 
                array[1] = ch; 
				word = (char *)malloc( 3 * sizeof(char) ); 
				memcpy(word, array, 2); 
				word[2] = '\0'; 
                result = find(word, 4, 1);      /*先检索是否为双目运算符*/ 
 
				if(result == 0) 
				{                           /*若不是双目运算符*/ 
                    /*word = (char *)malloc( 2 * sizeof(char) );*/ 
					memcpy(word, array, 1); 
					word[1] = '\0'; 
					result = find(word, 4, 1);      /*检索是否为单目运算符*/ 
 
					if(result == 0)                  /*若不是单目运算符*/ 
                    {                                /*检索是否为限界符 */ 
						result = find(word, 5, 1); 
 
						if(result == 0)              /*若不是限界符,则为非法字符*/ 
						{ 
							error_manage(array[0], line); 
						    errorno++; 
						    fseek(fp1, -1L, SEEK_CUR); 
						} 
						else 
						{                           /*若为限界符,写输出文件*/     
							fp2 = fopen("E:\\cifa\\output.txt", "a"); 
							fprintf(fp2, "5\t\t\t%s\t\t\t%d\t\n", word, result); 
							fclose(fp2); 
							fseek(fp1, -1L, SEEK_CUR); 
						} 
					} 
                    else 
					{     /*若为单目运算符,写入输出文件并将扫描文件指针回退一个字符*/ 
						fp2 = fopen("E:\\cifa\\output.txt", "a"); 
						fprintf(fp2, "4\t\t\t%s\t\t\t%d\t\n", word, result); 
						fclose(fp2); 
						fseek(fp1, -1L, SEEK_CUR); 
					} 
				} 
                else 
				{             /*若为双目运算符,写输出文件*/ 
					fp2 = fopen("E:\\cifa\\output.txt", "a"); 
					fprintf(fp2, "4\t\t\t%s\t\t\t%d\n", word, result); 
					fclose(fp2); 
				} 
				word=NULL; 
                free(word); 
 
            } 
 
            else 
			{      /*若读入的下一个字符为文件结束符,当前字符是否为 '}'*/ 
				if(array[0] != '}')         /*若不是,转出错处理*/ 
					error_manage(array[0], line); 
				else 
				{                          /*若是'}',写输出文件*/ 
					fp2 = fopen("E:\\cifa\\output.txt", "a"); 
					fprintf(fp2, "5\t\t\t%s\t\t\t%d\n", '}', 2); 
					fclose(fp2); 
				} 
			} 
		}/*运算限界非法字符判断结束*/ 
 
        ch = fgetc(fp1);  
    } 
	fclose(fp1); 
	fp2 = fopen("E:\\cifa\\output.txt", "a"); 
	fprintf(fp2, "Number of error(s): %d .\n", errorno); /*报告错误字符个数*/ 
	fprintf(fp2, "NUmber of identities: %d.\n", id_count); 
	fprintf(fp2, "Number of keywords: %d.\n", key_count); 
	fclose(fp2); 
} 
 
/*******************************************/ 
/*    主函数                               */ 
/*******************************************/ 
main() 
{            
    initialize();            /*初始化*/ 
    scanner();               /*扫描源程序*/ 
    printf("Succeed!\n"); 
    getch(); 
}