www.pudn.com > BAWK_NEW.zip > BAWKPAT.C


/* 
 * Bawk regular expression compiler/interpreter 
 */ 
#include  
#include "bawk.h" 
  
re_compile( patbuf ) 
char	*patbuf;		/* where to put compiled pattern */ 
{ 
	/* 
	 * Compile a regular expression from current input file 
	 * into the given pattern buffer. 
	 */ 
	int	c,		/* Current character         */ 
		o;		/* Temp                      */ 
	char	*patptr,	/* destination string pntr   */ 
		*lp,		/* Last pattern pointer      */ 
		*spp,		/* Save beginning of pattern */ 
		delim,		/* pattern delimiter         */ 
		*cclass();	/* Compile class routine     */ 
  
	patptr = patbuf; 
	delim = getcharacter(); 
 
	while ( (c = getcharacter()) != -1 && c != delim ) 
	{ 
		/* 
		 * STAR, PLUS and MINUS are special. 
		 */ 
		if (c == '*' || c == '+' || c == '-') { 
			if (patptr == patbuf || 
				  (o=patptr[-1]) == BOL || 
				  o == EOL || 
				  o == STAR || 
				  o == PLUS || 
				  o == MINUS) 
				error( "illegal occurrance op", RE_ERROR ); 
			*patptr++ = ENDPAT; 
			*patptr++ = ENDPAT; 
			spp = patptr;		/* Save pattern end     */ 
			while (--patptr > lp)	/* Move pattern down... */ 
				*patptr = patptr[-1];	/* one byte     */ 
			*patptr =   (c == '*') ? STAR : 
				(c == '-') ? MINUS : PLUS; 
			patptr = spp;		/* Restore pattern end  */ 
			continue; 
		} 
		/* 
		 * All the rest. 
		 */ 
		lp = patptr;			/* Remember start       */ 
		switch(c) { 
  
		case '^': 
			*patptr++ = BOL; 
			break; 
  
		case '$': 
			*patptr++ = EOL; 
			break; 
  
		case '.': 
			*patptr++ = ANY; 
			break; 
  
		case '[': 
			patptr = cclass( patptr ); 
			break; 
  
		case ':': 
			if ( (c=getcharacter()) != -1 ) 
			{ 
				switch( tolower( c ) ) 
				{ 
  
				case 'a': 
					*patptr++ = ALPHA; 
					break; 
  
				case 'd': 
					*patptr++ = DIGIT; 
					break; 
  
				case 'n': 
					*patptr++ = NALPHA; 
					break; 
  
				case ' ': 
					*patptr++ = PUNCT; 
					break; 
  
				default: 
					error( "unknown ':' type", RE_ERROR ); 
  
				} 
			} 
			else 
				error( "no ':' type", RE_ERROR ); 
 			break; 
 
		case '\\': 
			c = getcharacter(); 
  
		default: 
			*patptr++ = CHAR; 
			*patptr++ = c; 
		} 
	} 
	*patptr++ = ENDPAT; 
	*patptr++ = 0;			/* Terminate string     */ 
 
#ifdef DEBUG 
	if ( Debug>1 ) 
	{ 
		for ( lp=patbuf; lp ", c); break; 
			} 
		} 
		printf( "\n" ); 
	} 
#endif 
 
	return patptr - patbuf; 
} 
 
char * 
cclass( patbuf ) 
char	*patbuf;	/* destination pattern buffer */ 
{ 
	/* 
	 * Compile a class (within []) 
	 */ 
	char	*patptr,	/* destination pattern pointer */ 
		*cp;		/* Pattern start     */ 
	int	c,		/* Current character */ 
		o;		/* Temp              */ 
 
	patptr = patbuf; 
 
	if ( (c = getcharacter()) == -1 ) 
		error( "class terminates badly", RE_ERROR ); 
	else if ( c == '^') 
	{ 
		/* 
		 * Class exclusion, for example: [^abc] 
		 * Swallow the "^" and set token type to class exclusion. 
		 */ 
		o = NCLASS; 
	} 
	else 
	{ 
		/* 
		 * Normal class, for example: [abc] 
		 * push back the character and set token type to class 
		 */ 
		ungetcharacter( c ); 
		o = CLASS; 
	} 
	*patptr++ = o; 
 
	cp = patptr;	/* remember where byte count is */ 
	*patptr++ = 0;	/* and initialize byte count */ 
	while ( (c = getcharacter()) != -1 && c!=']' ) 
	{ 
		o = getcharacter();		/* peek at next char */ 
		if (c == '\\')			/* Store quoted chars */ 
		{ 
			if ( o == -1) /* Gotta get something */ 
				error( "class terminates badly", RE_ERROR ); 
			*patptr++ = o; 
		} 
		else if ( c=='-' && (patptr-cp)>1 && o!=']' && o != -1 ) 
		{ 
			c = patptr[-1];		/* Range start     */ 
			patptr[-1] = RANGE;	/* Range signal    */ 
			*patptr++ = c;		/* Re-store start  */ 
			*patptr++ = o;		/* Store end char  */ 
		} 
		else 
		{ 
			*patptr++ = c;		/* Store normal char */ 
			ungetcharacter( o ); 
		} 
	} 
	if (c != ']') 
		error( "unterminated class", RE_ERROR ); 
	if ( (c = (patptr - cp)) >= 256 ) 
		error( "class too large", RE_ERROR ); 
	if ( c == 0 ) 
		error( "empty class", RE_ERROR ); 
	*cp = c;		/* fill in byte count */ 
 
	return patptr; 
} 
  
match( line, pattern ) 
char	*line;		/* line to match */ 
char	*pattern;	/* pattern to match */ 
{ 
	/* 
	 * Match the current line (in Linebuf[]), return 1 if it does. 
	 */ 
	char	*l;		/* Line pointer       */ 
	char	*pmatch(); 
	char	*next; 
	int	matches; 
  
	matches = 0; 
	for (l = line; *l; l++) 
	{ 
		if ( next = pmatch(line, l, pattern) ) 
		{ 
			l = next - 1; 
			++matches; 
#ifdef DEBUG 
			if ( Debug ) 
				printf( "match!\n" ); 
#endif 
		} 
	} 
 
	return matches; 
} 
  
char * 
pmatch(linestart, line, pattern) 
char	*linestart;	/* start of line to match */ 
char	*line;		/* (partial) line to match      */ 
char	*pattern;	/* (partial) pattern to match   */ 
{ 
	char	*l;	/* Current line pointer         */ 
	char	*p;	/* Current pattern pointer      */ 
	char	c;	/* Current character            */ 
	char	*e;	/* End for STAR and PLUS match  */ 
	int	op;	/* Pattern operation            */ 
	int	n;	/* Class counter                */ 
	char	*are;	/* Start of STAR match          */ 
  
	l = line; 
 
#ifdef DEBUG 
	if (Debug > 1) 
		printf("pmatch(\"%s\")\n", line); 
#endif 
 
	p = pattern; 
	while ((op = *p++) != ENDPAT) { 
 
#ifdef DEBUG 
		if (Debug > 1) 
			printf("byte[%d] = 0%o, '%c', op = 0%o\n", 
					l-line, *l, *l, op); 
#endif 
 
		switch(op) { 
  
		case CHAR: 
			if ( *l++ != *p++) 
				return 0; 
			break; 
  
		case BOL: 
			if (l != linestart) 
				return 0; 
			break; 
  
		case EOL: 
			if (*l != '\0') 
				return 0; 
			break; 
  
		case ANY: 
			if (*l++ == '\0') 
				return 0; 
			break; 
  
		case DIGIT: 
			if ((c = *l++) < '0' || (c > '9')) 
				return 0; 
			break; 
  
		case ALPHA: 
			c = tolower( *l++ ); 
			if (c < 'a' || c > 'z') 
				return 0; 
			break; 
  
		case NALPHA: 
			c = tolower(*l++); 
			if (c >= 'a' && c <= 'z') 
				break; 
			else if (c < '0' || c > '9') 
				return 0; 
			break; 
  
		case PUNCT: 
			c = *l++; 
			if (c == 0 || c > ' ') 
				return 0; 
			break; 
  
		case CLASS: 
		case NCLASS: 
			c = *l++; 
			n = *p++ & 0377; 
			do { 
				if (*p == RANGE) { 
					p += 3; 
					n -= 2; 
					if (c >= p[-2] && c <= p[-1]) 
						break; 
				} 
				else if (c == *p++) 
					break; 
			} while (--n > 1); 
			if ((op == CLASS) == (n <= 1)) 
				return 0; 
			if (op == CLASS) 
				p += n - 2; 
			break; 
  
		case MINUS: 
			e = pmatch(linestart,l,p);/* Look for a match    */ 
			while (*p++ != ENDPAT);	/* Skip over pattern   */ 
			if (e)			/* Got a match?        */ 
				l = e;		/* Yes, update string  */ 
			break;			/* Always succeeds     */ 
  
		case PLUS:			/* One or more ...     */ 
			if ((l = pmatch(linestart,l,p)) == 0) 
				return 0;	/* Gotta have a match  */ 
		case STAR:			/* Zero or more ...    */ 
			are = l;		/* Remember line start */ 
			while (*l && (e = pmatch(linestart,l,p))) 
				l = e;		/* Get longest match   */ 
			while (*p++ != ENDPAT);	/* Skip over pattern   */ 
			while (l >= are) {	/* Try to match rest   */ 
				if (e = pmatch(linestart,l,p)) 
					return e; 
				--l;		/* Nope, try earlier   */ 
			} 
			return 0;		/* Nothing else worked */ 
  
		default: 
			fprintf( stderr, "bad op code %d\n", op ); 
			error( "can't happen -- match", RE_ERROR ); 
		} 
	} 
	return l; 
}