www.pudn.com > flex_bingle.rar > dfa.c


/* dfa - DFA construction routines */ 
 
/*- 
 * Copyright (c) 1990 The Regents of the University of California. 
 * All rights reserved. 
 * 
 * This code is derived from software contributed to Berkeley by 
 * Vern Paxson. 
 *  
 * The United States Government has rights in this work pursuant 
 * to contract no. DE-AC03-76SF00098 between the United States 
 * Department of Energy and the University of California. 
 * 
 * Redistribution and use in source and binary forms are permitted provided 
 * that: (1) source distributions retain this entire copyright notice and 
 * comment, and (2) distributions including binaries display the following 
 * acknowledgement:  ``This product includes software developed by the 
 * University of California, Berkeley and its contributors'' in the 
 * documentation or other materials provided with the distribution and in 
 * all advertising materials mentioning features or use of this software. 
 * Neither the name of the University nor the names of its contributors may 
 * be used to endorse or promote products derived from this software without 
 * specific prior written permission. 
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 
 */ 
 
/* $Header: /home/daffy/u0/vern/flex/RCS/dfa.c,v 2.26 95/04/20 13:53:14 vern Exp $ */ 
 
#include "flexdef.h" 
 
 
/* declare functions that have forward references */ 
 
void dump_associated_rules PROTO((FILE*, int)); 
void dump_transitions PROTO((FILE*, int[])); 
void sympartition PROTO((int[], int, int[], int[])); 
int symfollowset PROTO((int[], int, int, int[])); 
 
 
/* check_for_backing_up - check a DFA state for backing up 
 * 
 * synopsis 
 *     void check_for_backing_up( int ds, int state[numecs] ); 
 * 
 * ds is the number of the state to check and state[] is its out-transitions, 
 * indexed by equivalence class. 
 */ 
 
void check_for_backing_up( ds, state ) 
int ds; 
int state[]; 
	{ 
	if ( (reject && ! dfaacc[ds].dfaacc_set) || 
	     (! reject && ! dfaacc[ds].dfaacc_state) ) 
		{ /* state is non-accepting */ 
		++num_backing_up; 
 
		if ( backing_up_report ) 
			{ 
			fprintf( backing_up_file, 
				_( "State #%d is non-accepting -\n" ), ds ); 
 
			/* identify the state */ 
			dump_associated_rules( backing_up_file, ds ); 
 
			/* Now identify it further using the out- and 
			 * jam-transitions. 
			 */ 
			dump_transitions( backing_up_file, state ); 
 
			putc( '\n', backing_up_file ); 
			} 
		} 
	} 
 
 
/* check_trailing_context - check to see if NFA state set constitutes 
 *                          "dangerous" trailing context 
 * 
 * synopsis 
 *    void check_trailing_context( int nfa_states[num_states+1], int num_states, 
 *				int accset[nacc+1], int nacc ); 
 * 
 * NOTES 
 *  Trailing context is "dangerous" if both the head and the trailing 
 *  part are of variable size \and/ there's a DFA state which contains 
 *  both an accepting state for the head part of the rule and NFA states 
 *  which occur after the beginning of the trailing context. 
 * 
 *  When such a rule is matched, it's impossible to tell if having been 
 *  in the DFA state indicates the beginning of the trailing context or 
 *  further-along scanning of the pattern.  In these cases, a warning 
 *  message is issued. 
 * 
 *    nfa_states[1 .. num_states] is the list of NFA states in the DFA. 
 *    accset[1 .. nacc] is the list of accepting numbers for the DFA state. 
 */ 
 
void check_trailing_context( nfa_states, num_states, accset, nacc ) 
int *nfa_states, num_states; 
int *accset; 
int nacc; 
	{ 
	register int i, j; 
 
	for ( i = 1; i <= num_states; ++i ) 
		{ 
		int ns = nfa_states[i]; 
		register int type = state_type[ns]; 
		register int ar = assoc_rule[ns]; 
 
		if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) 
			{ /* do nothing */ 
			} 
 
		else if ( type == STATE_TRAILING_CONTEXT ) 
			{ 
			/* Potential trouble.  Scan set of accepting numbers 
			 * for the one marking the end of the "head".  We 
			 * assume that this looping will be fairly cheap 
			 * since it's rare that an accepting number set 
			 * is large. 
			 */ 
			for ( j = 1; j <= nacc; ++j ) 
				if ( accset[j] & YY_TRAILING_HEAD_MASK ) 
					{ 
					line_warning( 
					_( "dangerous trailing context" ), 
						rule_linenum[ar] ); 
					return; 
					} 
			} 
		} 
	} 
 
 
/* dump_associated_rules - list the rules associated with a DFA state 
 * 
 * Goes through the set of NFA states associated with the DFA and 
 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 
 * and writes a report to the given file. 
 */ 
 
void dump_associated_rules( file, ds ) 
FILE *file; 
int ds; 
	{ 
	register int i, j; 
	register int num_associated_rules = 0; 
	int rule_set[MAX_ASSOC_RULES + 1]; 
	int *dset = dss[ds]; 
	int size = dfasiz[ds]; 
 
	for ( i = 1; i <= size; ++i ) 
		{ 
		register int rule_num = rule_linenum[assoc_rule[dset[i]]]; 
 
		for ( j = 1; j <= num_associated_rules; ++j ) 
			if ( rule_num == rule_set[j] ) 
				break; 
 
		if ( j > num_associated_rules ) 
			{ /* new rule */ 
			if ( num_associated_rules < MAX_ASSOC_RULES ) 
				rule_set[++num_associated_rules] = rule_num; 
			} 
		} 
 
	bubble( rule_set, num_associated_rules ); 
 
	fprintf( file, _( " associated rule line numbers:" ) ); 
 
	for ( i = 1; i <= num_associated_rules; ++i ) 
		{ 
		if ( i % 8 == 1 ) 
			putc( '\n', file ); 
 
		fprintf( file, "\t%d", rule_set[i] ); 
		} 
 
	putc( '\n', file ); 
	} 
 
 
/* dump_transitions - list the transitions associated with a DFA state 
 * 
 * synopsis 
 *     dump_transitions( FILE *file, int state[numecs] ); 
 * 
 * Goes through the set of out-transitions and lists them in human-readable 
 * form (i.e., not as equivalence classes); also lists jam transitions 
 * (i.e., all those which are not out-transitions, plus EOF).  The dump 
 * is done to the given file. 
 */ 
 
void dump_transitions( file, state ) 
FILE *file; 
int state[]; 
	{ 
	register int i, ec; 
	int out_char_set[CSIZE]; 
 
	for ( i = 0; i < csize; ++i ) 
		{ 
		ec = ABS( ecgroup[i] ); 
		out_char_set[i] = state[ec]; 
		} 
 
	fprintf( file, _( " out-transitions: " ) ); 
 
	list_character_set( file, out_char_set ); 
 
	/* now invert the members of the set to get the jam transitions */ 
	for ( i = 0; i < csize; ++i ) 
		out_char_set[i] = ! out_char_set[i]; 
 
	fprintf( file, _( "\n jam-transitions: EOF " ) ); 
 
	list_character_set( file, out_char_set ); 
 
	putc( '\n', file ); 
	} 
 
 
/* epsclosure - construct the epsilon closure of a set of ndfa states 
 * 
 * synopsis 
 *    int *epsclosure( int t[num_states], int *numstates_addr, 
 *			int accset[num_rules+1], int *nacc_addr, 
 *			int *hashval_addr ); 
 * 
 * NOTES 
 *  The epsilon closure is the set of all states reachable by an arbitrary 
 *  number of epsilon transitions, which themselves do not have epsilon 
 *  transitions going out, unioned with the set of states which have non-null 
 *  accepting numbers.  t is an array of size numstates of nfa state numbers. 
 *  Upon return, t holds the epsilon closure and *numstates_addr is updated. 
 *  accset holds a list of the accepting numbers, and the size of accset is 
 *  given by *nacc_addr.  t may be subjected to reallocation if it is not 
 *  large enough to hold the epsilon closure. 
 * 
 *  hashval is the hash value for the dfa corresponding to the state set. 
 */ 
 
int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) 
int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; 
	{ 
	register int stkpos, ns, tsp; 
	int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 
	int stkend, nstate; 
	static int did_stk_init = false, *stk;  
 
#define MARK_STATE(state) \ 
trans1[state] = trans1[state] - MARKER_DIFFERENCE; 
 
#define IS_MARKED(state) (trans1[state] < 0) 
 
#define UNMARK_STATE(state) \ 
trans1[state] = trans1[state] + MARKER_DIFFERENCE; 
 
#define CHECK_ACCEPT(state) \ 
{ \ 
nfaccnum = accptnum[state]; \ 
if ( nfaccnum != NIL ) \ 
accset[++nacc] = nfaccnum; \ 
} 
 
#define DO_REALLOCATION \ 
{ \ 
current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 
++num_reallocs; \ 
t = reallocate_integer_array( t, current_max_dfa_size ); \ 
stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 
} \ 
 
#define PUT_ON_STACK(state) \ 
{ \ 
if ( ++stkend >= current_max_dfa_size ) \ 
DO_REALLOCATION \ 
stk[stkend] = state; \ 
MARK_STATE(state) \ 
} 
 
#define ADD_STATE(state) \ 
{ \ 
if ( ++numstates >= current_max_dfa_size ) \ 
DO_REALLOCATION \ 
t[numstates] = state; \ 
hashval += state; \ 
} 
 
#define STACK_STATE(state) \ 
{ \ 
PUT_ON_STACK(state) \ 
CHECK_ACCEPT(state) \ 
if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 
ADD_STATE(state) \ 
} 
 
 
	if ( ! did_stk_init ) 
		{ 
		stk = allocate_integer_array( current_max_dfa_size ); 
		did_stk_init = true; 
		} 
 
	nacc = stkend = hashval = 0; 
 
	for ( nstate = 1; nstate <= numstates; ++nstate ) 
		{ 
		ns = t[nstate]; 
 
		/* The state could be marked if we've already pushed it onto 
		 * the stack. 
		 */ 
		if ( ! IS_MARKED(ns) ) 
			{ 
			PUT_ON_STACK(ns) 
			CHECK_ACCEPT(ns) 
			hashval += ns; 
			} 
		} 
 
	for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 
		{ 
		ns = stk[stkpos]; 
		transsym = transchar[ns]; 
 
		if ( transsym == SYM_EPSILON ) 
			{ 
			tsp = trans1[ns] + MARKER_DIFFERENCE; 
 
			if ( tsp != NO_TRANSITION ) 
				{ 
				if ( ! IS_MARKED(tsp) ) 
					STACK_STATE(tsp) 
 
				tsp = trans2[ns]; 
 
				if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) ) 
					STACK_STATE(tsp) 
				} 
			} 
		} 
 
	/* Clear out "visit" markers. */ 
 
	for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 
		{ 
		if ( IS_MARKED(stk[stkpos]) ) 
			UNMARK_STATE(stk[stkpos]) 
		else 
			flexfatal( 
			_( "consistency check failed in epsclosure()" ) ); 
		} 
 
	*ns_addr = numstates; 
	*hv_addr = hashval; 
	*nacc_addr = nacc; 
 
	return t; 
	} 
 
 
/* increase_max_dfas - increase the maximum number of DFAs */ 
 
void increase_max_dfas() 
	{ 
	current_max_dfas += MAX_DFAS_INCREMENT; 
 
	++num_reallocs; 
 
	base = reallocate_integer_array( base, current_max_dfas ); 
	def = reallocate_integer_array( def, current_max_dfas ); 
	dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); 
	accsiz = reallocate_integer_array( accsiz, current_max_dfas ); 
	dhash = reallocate_integer_array( dhash, current_max_dfas ); 
	dss = reallocate_int_ptr_array( dss, current_max_dfas ); 
	dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); 
 
	if ( nultrans ) 
		nultrans = 
			reallocate_integer_array( nultrans, current_max_dfas ); 
	} 
 
 
/* ntod - convert an ndfa to a dfa 
 * 
 * Creates the dfa corresponding to the ndfa we've constructed.  The 
 * dfa starts out in state #1. 
 */ 
 
void ntod() 
	{ 
	int *accset, ds, nacc, newds; 
	int sym, hashval, numstates, dsize; 
	int num_full_table_rows;	/* used only for -f */ 
	int *nset, *dset; 
	int targptr, totaltrans, i, comstate, comfreq, targ; 
	int symlist[CSIZE + 1]; 
	int num_start_states; 
	int todo_head, todo_next; 
 
	/* Note that the following are indexed by *equivalence classes* 
	 * and not by characters.  Since equivalence classes are indexed 
	 * beginning with 1, even if the scanner accepts NUL's, this 
	 * means that (since every character is potentially in its own 
	 * equivalence class) these arrays must have room for indices 
	 * from 1 to CSIZE, so their size must be CSIZE + 1. 
	 */ 
	int duplist[CSIZE + 1], state[CSIZE + 1]; 
	int targfreq[CSIZE + 1], targstate[CSIZE + 1]; 
 
	accset = allocate_integer_array( num_rules + 1 ); 
	nset = allocate_integer_array( current_max_dfa_size ); 
 
	/* The "todo" queue is represented by the head, which is the DFA 
	 * state currently being processed, and the "next", which is the 
	 * next DFA state number available (not in use).  We depend on the 
	 * fact that snstods() returns DFA's \in increasing order/, and thus 
	 * need only know the bounds of the dfas to be processed. 
	 */ 
	todo_head = todo_next = 0; 
 
	for ( i = 0; i <= csize; ++i ) 
		{ 
		duplist[i] = NIL; 
		symlist[i] = false; 
		} 
 
	for ( i = 0; i <= num_rules; ++i ) 
		accset[i] = NIL; 
 
	if ( trace ) 
		{ 
		dumpnfa( scset[1] ); 
		fputs( _( "\n\nDFA Dump:\n\n" ), stderr ); 
		} 
 
	inittbl(); 
 
	/* Check to see whether we should build a separate table for 
	 * transitions on NUL characters.  We don't do this for full-speed 
	 * (-F) scanners, since for them we don't have a simple state 
	 * number lying around with which to index the table.  We also 
	 * don't bother doing it for scanners unless (1) NUL is in its own 
	 * equivalence class (indicated by a positive value of 
	 * ecgroup[NUL]), (2) NUL's equivalence class is the last 
	 * equivalence class, and (3) the number of equivalence classes is 
	 * the same as the number of characters.  This latter case comes 
	 * about when useecs is false or when it's true but every character 
	 * still manages to land in its own class (unlikely, but it's 
	 * cheap to check for).  If all these things are true then the 
	 * character code needed to represent NUL's equivalence class for 
	 * indexing the tables is going to take one more bit than the 
	 * number of characters, and therefore we won't be assured of 
	 * being able to fit it into a YY_CHAR variable.  This rules out 
	 * storing the transitions in a compressed table, since the code 
	 * for interpreting them uses a YY_CHAR variable (perhaps it 
	 * should just use an integer, though; this is worth pondering ... 
	 * ###). 
	 * 
	 * Finally, for full tables, we want the number of entries in the 
	 * table to be a power of two so the array references go fast (it 
	 * will just take a shift to compute the major index).  If 
	 * encoding NUL's transitions in the table will spoil this, we 
	 * give it its own table (note that this will be the case if we're 
	 * not using equivalence classes). 
	 */ 
 
	/* Note that the test for ecgroup[0] == numecs below accomplishes 
	 * both (1) and (2) above 
	 */ 
	if ( ! fullspd && ecgroup[0] == numecs ) 
		{ 
		/* NUL is alone in its equivalence class, which is the 
		 * last one. 
		 */ 
		int use_NUL_table = (numecs == csize); 
 
		if ( fulltbl && ! use_NUL_table ) 
			{ 
			/* We still may want to use the table if numecs 
			 * is a power of 2. 
			 */ 
			int power_of_two; 
 
			for ( power_of_two = 1; power_of_two <= csize; 
			      power_of_two *= 2 ) 
				if ( numecs == power_of_two ) 
					{ 
					use_NUL_table = true; 
					break; 
					} 
			} 
 
		if ( use_NUL_table ) 
			nultrans = allocate_integer_array( current_max_dfas ); 
 
		/* From now on, nultrans != nil indicates that we're 
		 * saving null transitions for later, separate encoding. 
		 */ 
		} 
 
 
	if ( fullspd ) 
		{ 
		for ( i = 0; i <= numecs; ++i ) 
			state[i] = 0; 
 
		place_state( state, 0, 0 ); 
		dfaacc[0].dfaacc_state = 0; 
		} 
 
	else if ( fulltbl ) 
		{ 
		if ( nultrans ) 
			/* We won't be including NUL's transitions in the 
			 * table, so build it for entries from 0 .. numecs - 1. 
			 */ 
			num_full_table_rows = numecs; 
 
		else 
			/* Take into account the fact that we'll be including 
			 * the NUL entries in the transition table.  Build it 
			 * from 0 .. numecs. 
			 */ 
			num_full_table_rows = numecs + 1; 
 
		/* Unless -Ca, declare it "short" because it's a real 
		 * long-shot that that won't be large enough. 
		 */ 
		out_str_dec( "static yyconst %s yy_nxt[][%d] =\n    {\n", 
			/* '}' so vi doesn't get too confused */ 
			long_align ? "long" : "short", num_full_table_rows ); 
 
		outn( "    {" ); 
 
		/* Generate 0 entries for state #0. */ 
		for ( i = 0; i < num_full_table_rows; ++i ) 
			mk2data( 0 ); 
 
		dataflush(); 
		outn( "    },\n" ); 
		} 
 
	/* Create the first states. */ 
 
	num_start_states = lastsc * 2; 
 
	for ( i = 1; i <= num_start_states; ++i ) 
		{ 
		numstates = 1; 
 
		/* For each start condition, make one state for the case when 
		 * we're at the beginning of the line (the '^' operator) and 
		 * one for the case when we're not. 
		 */ 
		if ( i % 2 == 1 ) 
			nset[numstates] = scset[(i / 2) + 1]; 
		else 
			nset[numstates] = 
				mkbranch( scbol[i / 2], scset[i / 2] ); 
 
		nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); 
 
		if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) 
			{ 
			numas += nacc; 
			totnst += numstates; 
			++todo_next; 
 
			if ( variable_trailing_context_rules && nacc > 0 ) 
				check_trailing_context( nset, numstates, 
							accset, nacc ); 
			} 
		} 
 
	if ( ! fullspd ) 
		{ 
		if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) 
			flexfatal( 
			_( "could not create unique end-of-buffer state" ) ); 
 
		++numas; 
		++num_start_states; 
		++todo_next; 
		} 
 
	while ( todo_head < todo_next ) 
		{ 
		targptr = 0; 
		totaltrans = 0; 
 
		for ( i = 1; i <= numecs; ++i ) 
			state[i] = 0; 
 
		ds = ++todo_head; 
 
		dset = dss[ds]; 
		dsize = dfasiz[ds]; 
 
		if ( trace ) 
			fprintf( stderr, _( "state # %d:\n" ), ds ); 
 
		sympartition( dset, dsize, symlist, duplist ); 
 
		for ( sym = 1; sym <= numecs; ++sym ) 
			{ 
			if ( symlist[sym] ) 
				{ 
				symlist[sym] = 0; 
 
				if ( duplist[sym] == NIL ) 
					{ 
					/* Symbol has unique out-transitions. */ 
					numstates = symfollowset( dset, dsize, 
								sym, nset ); 
					nset = epsclosure( nset, &numstates, 
						accset, &nacc, &hashval ); 
 
					if ( snstods( nset, numstates, accset, 
						nacc, hashval, &newds ) ) 
						{ 
						totnst = totnst + numstates; 
						++todo_next; 
						numas += nacc; 
 
						if ( 
					variable_trailing_context_rules && 
							nacc > 0 ) 
							check_trailing_context( 
								nset, numstates, 
								accset, nacc ); 
						} 
 
					state[sym] = newds; 
 
					if ( trace ) 
						fprintf( stderr, "\t%d\t%d\n", 
							sym, newds ); 
 
					targfreq[++targptr] = 1; 
					targstate[targptr] = newds; 
					++numuniq; 
					} 
 
				else 
					{ 
					/* sym's equivalence class has the same 
					 * transitions as duplist(sym)'s 
					 * equivalence class. 
					 */ 
					targ = state[duplist[sym]]; 
					state[sym] = targ; 
 
					if ( trace ) 
						fprintf( stderr, "\t%d\t%d\n", 
							sym, targ ); 
 
					/* Update frequency count for 
					 * destination state. 
					 */ 
 
					i = 0; 
					while ( targstate[++i] != targ ) 
						; 
 
					++targfreq[i]; 
					++numdup; 
					} 
 
				++totaltrans; 
				duplist[sym] = NIL; 
				} 
			} 
 
		if ( caseins && ! useecs ) 
			{ 
			register int j; 
 
			for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) 
				{ 
				if ( state[i] == 0 && state[j] != 0 ) 
					/* We're adding a transition. */ 
					++totaltrans; 
 
				else if ( state[i] != 0 && state[j] == 0 ) 
					/* We're taking away a transition. */ 
					--totaltrans; 
 
				state[i] = state[j]; 
				} 
			} 
 
		numsnpairs += totaltrans; 
 
		if ( ds > num_start_states ) 
			check_for_backing_up( ds, state ); 
 
		if ( nultrans ) 
			{ 
			nultrans[ds] = state[NUL_ec]; 
			state[NUL_ec] = 0;	/* remove transition */ 
			} 
 
		if ( fulltbl ) 
			{ 
			outn( "    {" ); 
 
			/* Supply array's 0-element. */ 
			if ( ds == end_of_buffer_state ) 
				mk2data( -end_of_buffer_state ); 
			else 
				mk2data( end_of_buffer_state ); 
 
			for ( i = 1; i < num_full_table_rows; ++i ) 
				/* Jams are marked by negative of state 
				 * number. 
				 */ 
				mk2data( state[i] ? state[i] : -ds ); 
 
			dataflush(); 
			outn( "    },\n" ); 
			} 
 
		else if ( fullspd ) 
			place_state( state, ds, totaltrans ); 
 
		else if ( ds == end_of_buffer_state ) 
			/* Special case this state to make sure it does what 
			 * it's supposed to, i.e., jam on end-of-buffer. 
			 */ 
			stack1( ds, 0, 0, JAMSTATE ); 
 
		else /* normal, compressed state */ 
			{ 
			/* Determine which destination state is the most 
			 * common, and how many transitions to it there are. 
			 */ 
 
			comfreq = 0; 
			comstate = 0; 
 
			for ( i = 1; i <= targptr; ++i ) 
				if ( targfreq[i] > comfreq ) 
					{ 
					comfreq = targfreq[i]; 
					comstate = targstate[i]; 
					} 
 
			bldtbl( state, ds, totaltrans, comstate, comfreq ); 
			} 
		} 
 
	if ( fulltbl ) 
		dataend(); 
 
	else if ( ! fullspd ) 
		{ 
		cmptmps();  /* create compressed template entries */ 
 
		/* Create tables for all the states with only one 
		 * out-transition. 
		 */ 
		while ( onesp > 0 ) 
			{ 
			mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], 
			onedef[onesp] ); 
			--onesp; 
			} 
 
		mkdeftbl(); 
		} 
 
	flex_free( (void *) accset ); 
	flex_free( (void *) nset ); 
	} 
 
 
/* snstods - converts a set of ndfa states into a dfa state 
 * 
 * synopsis 
 *    is_new_state = snstods( int sns[numstates], int numstates, 
 *				int accset[num_rules+1], int nacc, 
 *				int hashval, int *newds_addr ); 
 * 
 * On return, the dfa state number is in newds. 
 */ 
 
int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) 
int sns[], numstates, accset[], nacc, hashval, *newds_addr; 
	{ 
	int didsort = 0; 
	register int i, j; 
	int newds, *oldsns; 
 
	for ( i = 1; i <= lastdfa; ++i ) 
		if ( hashval == dhash[i] ) 
			{ 
			if ( numstates == dfasiz[i] ) 
				{ 
				oldsns = dss[i]; 
 
				if ( ! didsort ) 
					{ 
					/* We sort the states in sns so we 
					 * can compare it to oldsns quickly. 
					 * We use bubble because there probably 
					 * aren't very many states. 
					 */ 
					bubble( sns, numstates ); 
					didsort = 1; 
					} 
 
				for ( j = 1; j <= numstates; ++j ) 
					if ( sns[j] != oldsns[j] ) 
						break; 
 
				if ( j > numstates ) 
					{ 
					++dfaeql; 
					*newds_addr = i; 
					return 0; 
					} 
 
				++hshcol; 
				} 
 
			else 
				++hshsave; 
			} 
 
	/* Make a new dfa. */ 
 
	if ( ++lastdfa >= current_max_dfas ) 
		increase_max_dfas(); 
 
	newds = lastdfa; 
 
	dss[newds] = allocate_integer_array( numstates + 1 ); 
 
	/* If we haven't already sorted the states in sns, we do so now, 
	 * so that future comparisons with it can be made quickly. 
	 */ 
 
	if ( ! didsort ) 
		bubble( sns, numstates ); 
 
	for ( i = 1; i <= numstates; ++i ) 
		dss[newds][i] = sns[i]; 
 
	dfasiz[newds] = numstates; 
	dhash[newds] = hashval; 
 
	if ( nacc == 0 ) 
		{ 
		if ( reject ) 
			dfaacc[newds].dfaacc_set = (int *) 0; 
		else 
			dfaacc[newds].dfaacc_state = 0; 
 
		accsiz[newds] = 0; 
		} 
 
	else if ( reject ) 
		{ 
		/* We sort the accepting set in increasing order so the 
		 * disambiguating rule that the first rule listed is considered 
		 * match in the event of ties will work.  We use a bubble 
		 * sort since the list is probably quite small. 
		 */ 
 
		bubble( accset, nacc ); 
 
		dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 ); 
 
		/* Save the accepting set for later */ 
		for ( i = 1; i <= nacc; ++i ) 
			{ 
			dfaacc[newds].dfaacc_set[i] = accset[i]; 
 
			if ( accset[i] <= num_rules ) 
				/* Who knows, perhaps a REJECT can yield 
				 * this rule. 
				 */ 
				rule_useful[accset[i]] = true; 
			} 
 
		accsiz[newds] = nacc; 
		} 
 
	else 
		{ 
		/* Find lowest numbered rule so the disambiguating rule 
		 * will work. 
		 */ 
		j = num_rules + 1; 
 
		for ( i = 1; i <= nacc; ++i ) 
			if ( accset[i] < j ) 
				j = accset[i]; 
 
		dfaacc[newds].dfaacc_state = j; 
 
		if ( j <= num_rules ) 
			rule_useful[j] = true; 
		} 
 
	*newds_addr = newds; 
 
	return 1; 
	} 
 
 
/* symfollowset - follow the symbol transitions one step 
 * 
 * synopsis 
 *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 
 *				int transsym, int nset[current_max_dfa_size] ); 
 */ 
 
int symfollowset( ds, dsize, transsym, nset ) 
int ds[], dsize, transsym, nset[]; 
	{ 
	int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 
 
	numstates = 0; 
 
	for ( i = 1; i <= dsize; ++i ) 
		{ /* for each nfa state ns in the state set of ds */ 
		ns = ds[i]; 
		sym = transchar[ns]; 
		tsp = trans1[ns]; 
 
		if ( sym < 0 ) 
			{ /* it's a character class */ 
			sym = -sym; 
			ccllist = cclmap[sym]; 
			lenccl = ccllen[sym]; 
 
			if ( cclng[sym] ) 
				{ 
				for ( j = 0; j < lenccl; ++j ) 
					{ 
					/* Loop through negated character 
					 * class. 
					 */ 
					ch = ccltbl[ccllist + j]; 
 
					if ( ch == 0 ) 
						ch = NUL_ec; 
 
					if ( ch > transsym ) 
						/* Transsym isn't in negated 
						 * ccl. 
						 */ 
						break; 
 
					else if ( ch == transsym ) 
						/* next 2 */ goto bottom; 
					} 
 
				/* Didn't find transsym in ccl. */ 
				nset[++numstates] = tsp; 
				} 
 
			else 
				for ( j = 0; j < lenccl; ++j ) 
					{ 
					ch = ccltbl[ccllist + j]; 
 
					if ( ch == 0 ) 
						ch = NUL_ec; 
 
					if ( ch > transsym ) 
						break; 
					else if ( ch == transsym ) 
						{ 
						nset[++numstates] = tsp; 
						break; 
						} 
					} 
			} 
 
		else if ( sym >= 'A' && sym <= 'Z' && caseins ) 
			flexfatal( 
			_( "consistency check failed in symfollowset" ) ); 
 
		else if ( sym == SYM_EPSILON ) 
			{ /* do nothing */ 
			} 
 
		else if ( ABS( ecgroup[sym] ) == transsym ) 
			nset[++numstates] = tsp; 
 
		bottom: ; 
		} 
 
	return numstates; 
	} 
 
 
/* sympartition - partition characters with same out-transitions 
 * 
 * synopsis 
 *    sympartition( int ds[current_max_dfa_size], int numstates, 
 *			int symlist[numecs], int duplist[numecs] ); 
 */ 
 
void sympartition( ds, numstates, symlist, duplist ) 
int ds[], numstates; 
int symlist[], duplist[]; 
	{ 
	int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 
 
	/* Partitioning is done by creating equivalence classes for those 
	 * characters which have out-transitions from the given state.  Thus 
	 * we are really creating equivalence classes of equivalence classes. 
	 */ 
 
	for ( i = 1; i <= numecs; ++i ) 
		{ /* initialize equivalence class list */ 
		duplist[i] = i - 1; 
		dupfwd[i] = i + 1; 
		} 
 
	duplist[1] = NIL; 
	dupfwd[numecs] = NIL; 
 
	for ( i = 1; i <= numstates; ++i ) 
		{ 
		ns = ds[i]; 
		tch = transchar[ns]; 
 
		if ( tch != SYM_EPSILON ) 
			{ 
			if ( tch < -lastccl || tch >= csize ) 
				{ 
				flexfatal( 
		_( "bad transition character detected in sympartition()" ) ); 
				} 
 
			if ( tch >= 0 ) 
				{ /* character transition */ 
				int ec = ecgroup[tch]; 
 
				mkechar( ec, dupfwd, duplist ); 
				symlist[ec] = 1; 
				} 
 
			else 
				{ /* character class */ 
				tch = -tch; 
 
				lenccl = ccllen[tch]; 
				cclp = cclmap[tch]; 
				mkeccl( ccltbl + cclp, lenccl, dupfwd, 
					duplist, numecs, NUL_ec ); 
 
				if ( cclng[tch] ) 
					{ 
					j = 0; 
 
					for ( k = 0; k < lenccl; ++k ) 
						{ 
						ich = ccltbl[cclp + k]; 
 
						if ( ich == 0 ) 
							ich = NUL_ec; 
 
						for ( ++j; j < ich; ++j ) 
							symlist[j] = 1; 
						} 
 
					for ( ++j; j <= numecs; ++j ) 
						symlist[j] = 1; 
					} 
 
				else 
					for ( k = 0; k < lenccl; ++k ) 
						{ 
						ich = ccltbl[cclp + k]; 
 
						if ( ich == 0 ) 
							ich = NUL_ec; 
 
						symlist[ich] = 1; 
						} 
				} 
			} 
		} 
	}