www.pudn.com > FP-GROWTH.rar > symtab.c
/*----------------------------------------------------------------------
File : symtab.c
Contents: symbol table management
Author : Christian Borgelt
History : 22.10.1995 file created
30.10.1995 functions made independent of symbol data
26.11.1995 symbol types and visibility levels added
04.01.1996 st_clear added
27.02.1996 st_insert modified
28.06.1996 dynamic bucket vector enlargement added
04.07.1996 bug in bucket reorganization removed
01.04.1997 functions st_clear and st_remove combined
29.07.1997 minor improvements
05.08.1997 minor improvements
16.11.1997 some comments improved
06.02.1998 default table sizes changed
31.05.1998 list of all symbols removed
20.06.1998 deletion function moved to st_create
14.07.1998 minor improvements
01.09.1998 bug in function _sort removed, assertions added
25.09.1998 hash function improved
28.09.1998 types ULONG and CCHAR removed, st_stats added
04.02.1999 long int changed to int
10.11.1999 name/identifier map management added
15.08.2003 renamed new to nel in st_insert (C++ compat.)
15.12.2004 function nim_trunc added
28.12.2004 bug in function nim_trunc fixed
----------------------------------------------------------------------*/
#include
#include
#include
#include
#include
#include "symtab.h"
#ifdef NIMAPFN
#include "vecops.h"
#endif
#ifdef STORAGE
#include "storage.h"
#endif
/*----------------------------------------------------------------------
Preprocessor Definitions
----------------------------------------------------------------------*/
#define DFLT_INIT 1023 /* default initial hash table size */
#if (INT_MAX > 32767)
#define DFLT_MAX 1048575 /* default maximal hash table size */
#else
#define DFLT_MAX 16383 /* default maximal hash table size */
#endif
#define BLKSIZE 256 /* block size for identifier vector */
/*----------------------------------------------------------------------
Default Hash Function
----------------------------------------------------------------------*/
static unsigned _hdflt (const char *name, int type)
{ /* --- default hash function */
register unsigned h = type; /* hash value */
while (*name) h ^= (h << 3) ^ (unsigned)(*name++);
return h; /* compute hash value */
} /* _hdflt() */
/*----------------------------------------------------------------------
Auxiliary Functions
----------------------------------------------------------------------*/
static void _delsym (SYMTAB *tab)
{ /* --- delete all symbols */
int i; /* loop variable */
STE *ste, *tmp; /* to traverse the symbol list */
for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
ste = tab->bvec[i]; /* get the next bucket list, */
tab->bvec[i] = NULL; /* clear the bucket vector entry, */
while (ste) { /* and traverse the bucket list */
tmp = ste; /* note the symbol to delete */
ste = ste->succ; /* and get the next symbol */
if (tab->delfn) tab->delfn(tmp +1);
free(tmp); /* if a deletion function is given, */
} /* call it and then deallocate */
} /* the symbol table element */
} /* _delsym() */
/*--------------------------------------------------------------------*/
static STE** _merge (STE *in[], int cnt[], STE **out)
{ /* --- merge two lists into one */
int k; /* index of input list */
do { /* compare and merge loop */
k = (in[0]->level > in[1]->level) ? 0 : 1;
*out = in[k]; /* append the element on the higher */
out = &(*out)->succ; /* level to the output list and */
in[k] = *out; /* remove it from the input list */
} while (--cnt[k] > 0); /* while both lists are not empty */
*out = in[k ^= 1]; /* append remaining elements */
while (--cnt[k] >= 0) /* while not at the end of the list */
out = &(*out)->succ; /* go to the successor element */
in[k] = *out; /* set new start of the input list */
*out = NULL; /* terminate the output list and */
return out; /* return new end of the output list */
} /* _merge() */
/*--------------------------------------------------------------------*/
static STE* _sort (STE *list)
{ /* --- sort a hash bucket list */
STE *ste; /* to traverse the list, buffer */
STE *in[2], *out[2]; /* input and output lists */
STE **end[2]; /* ends of output lists */
int cnt[2]; /* number of elements to merge */
int run; /* run length in input lists */
int rem; /* elements in remainder collection */
int oid; /* index of output list */
if (!list) return list; /* empty lists need not to be sorted */
oid = 0; out[0] = list; /* traverse list elements */
for (ste = list->succ; ste; ste = ste->succ)
if ((oid ^= 1) == 0) list = list->succ;
out[1] = list->succ; /* split list into two equal parts */
list = list->succ = NULL; /* initialize remainder collection */
run = 1; rem = 0; /* and run length */
while (out[1]) { /* while there are two lists */
in [0] = out[0]; in [1] = out[1]; /* move output list to input */
end[0] = out; end[1] = out+1; /* reinitialize end pointers */
out[1] = NULL; oid = 0; /* start with 1st output list */
do { /* merge loop */
cnt[0] = cnt[1] = run; /* merge run elements from the */
end[oid] = _merge(in, cnt, end[oid]); /* input lists */
oid ^= 1; /* toggle index of output list */
} while (in[1]); /* while both lists are not empty */
if (in[0]) { /* if there is one input list left */
if (!list) /* if there is no rem. collection, */
list = in[0]; /* just note the rem. input list */
else { /* if there is a rem. collection, */
cnt[0] = run; cnt[1] = rem; in[1] = list;
_merge(in, cnt, &list); /* merge it and the input list to */
} /* get the new renmainder collection */
rem += run; /* there are now run more elements */
} /* in the remainder collection */
run <<= 1; /* double run length */
} /* while (out[1]) .. */
if (rem > 0) { /* if there is a rem. collection */
in[0] = out[0]; cnt[0] = run;
in[1] = list; cnt[1] = rem;
_merge(in, cnt, out); /* merge it to the output list */
} /* and store the result in out[0] */
return out[0]; /* return the sorted list */
} /* _sort() */
/*--------------------------------------------------------------------*/
static void _reorg (SYMTAB *tab)
{ /* --- reorganize a hash table */
int i; /* loop variable */
int size; /* new bucket vector size */
STE **p; /* new bucket vector, buffer */
STE *ste; /* to traverse symbol table elements */
STE *list = NULL; /* list of all symbols */
size = (tab->size << 1) +1; /* calculate new vector size */
if (size > tab->max) /* if new size exceeds maximum, */
size = tab->max; /* set the maximal size */
for (p = &list, i = tab->size; --i >= 0; ) {
*p = tab->bvec[i]; /* traverse the bucket vector and */
while (*p) p = &(*p)->succ; /* link all bucket lists together */
} /* (collect symbols) */
p = (STE**)realloc(tab->bvec, size *sizeof(STE*));
if (!p) return; /* enlarge bucket vector */
tab->bvec = p; /* set new bucket vector */
tab->size = size; /* and its size */
for (p += i = size; --i >= 0; )
*--p = NULL; /* clear the hash buckets */
while (list) { /* traverse list of all symbols */
ste = list; list = list->succ; /* get next symbol */
i = tab->hash(ste->name, ste->type) %size;
ste->succ = tab->bvec[i]; /* compute the hash bucket index */
tab->bvec[i] = ste; /* and insert the symbol at */
} /* the head of the bucket list */
for (i = size; --i >= 0; ) /* sort bucket lists according to */
tab->bvec[i] = _sort(tab->bvec[i]); /* the visibility level */
} /* _reorg() */
/*----------------------------------------------------------------------
Symbol Table Functions
----------------------------------------------------------------------*/
SYMTAB* st_create (int init, int max, HASHFN hash, SYMFN delfn)
{ /* --- create a symbol table */
SYMTAB *tab; /* created symbol table */
if (init <= 0) init = DFLT_INIT; /* check and adapt initial */
if (max <= 0) max = DFLT_MAX; /* and maximal vector size */
tab = (SYMTAB*)malloc(sizeof(SYMTAB));
if (!tab) return NULL; /* allocate symbol table body */
tab->bvec = (STE**)calloc(init, sizeof(STE*));
if (!tab->bvec) { free(tab); return NULL; }
tab->level = tab->cnt = 0; /* allocate bucket vector */
tab->size = init; /* and initialize fields */
tab->max = max; /* of symbol table body */
tab->hash = (hash) ? hash : _hdflt;
tab->delfn = delfn;
tab->vsz = INT_MAX;
tab->ids = NULL;
return tab; /* return created symbol table */
} /* st_create() */
/*--------------------------------------------------------------------*/
void st_delete (SYMTAB *tab)
{ /* --- delete a symbol table */
assert(tab && tab->bvec); /* check argument */
_delsym(tab); /* delete all symbols, */
free(tab->bvec); /* the bucket vector, */
if (tab->ids) free(tab->ids); /* the identifier vector, */
free(tab); /* and the symbol table body */
} /* st_delete() */
/*--------------------------------------------------------------------*/
void* st_insert (SYMTAB *tab, const char *name, int type,
unsigned size)
{ /* --- insert a symbol */
unsigned h; /* hash value */
int i; /* index of hash bucket */
STE *ste; /* to traverse bucket list */
STE *nel; /* new symbol table element */
assert(tab && name /* check the function arguments */
&& ((size >= sizeof(int)) || (tab->vsz == INT_MAX)));
if ((tab->cnt /4 > tab->size) /* if buckets are rather full and */
&& (tab->size < tab->max)) /* table does not have maximal size, */
_reorg(tab); /* reorganize the hash table */
h = tab->hash(name, type); /* compute the hash value and */
i = h % tab->size; /* the index of the hash bucket */
ste = tab->bvec[i]; /* get first element in bucket */
while (ste) { /* traverse the bucket list */
if ((type == ste->type) /* if symbol found */
&& (strcmp(name, ste->name) == 0))
break; /* abort the loop */
ste = ste->succ; /* otherwise get the successor */
} /* element in the hash bucket */
if (ste /* if symbol found on current level */
&& (ste->level == tab->level))
return EXISTS; /* return 'symbol exists' */
#ifdef NIMAPFN /* if name/identifier map management */
if (tab->cnt >= tab->vsz) { /* if the identifier vector is full */
int vsz, **tmp; /* (new) id vector and its size */
vsz = tab->vsz +((tab->vsz > BLKSIZE) ? tab->vsz >> 1 : BLKSIZE);
tmp = (int**)realloc(tab->ids, vsz *sizeof(int*));
if (!tmp) return NULL; /* resize the identifier vector and */
tab->ids = tmp; tab->vsz = vsz; /* set new vector and its size */
} /* (no resizing for symbol tables */
#endif /* since then tab->vsz = MAX_INT) */
nel = (STE*)malloc(sizeof(STE) +size +strlen(name) +1);
if (!nel) return NULL; /* allocate memory for new symbol */
nel->name = (char*)(nel+1) +size; /* and organize it */
strcpy(nel->name, name); /* note the symbol name, */
nel->type = type; /* the symbol type, and the */
nel->level = tab->level; /* current visibility level */
nel->succ = tab->bvec[i]; /* insert new symbol at the head */
tab->bvec[i] = nel++; /* of the bucket list */
#ifdef NIMAPFN /* if name/identifier maps are */
if (tab->ids) { /* supported and this is such a map */
tab->ids[tab->cnt] = (int*)nel;
*(int*)nel = tab->cnt; /* store the new symbol */
} /* in the identifier vector */
#endif /* and set the symbol identifier */
tab->cnt++; /* increment the symbol counter */
return nel; /* return pointer to data field */
} /* st_insert() */
/*--------------------------------------------------------------------*/
int st_remove (SYMTAB *tab, const char *name, int type)
{ /* --- remove a symbol/all symbols */
int i; /* index of hash bucket */
STE **p, *ste; /* to traverse bucket list */
assert(tab); /* check for a valid symbol table */
/* --- remove all symbols --- */
if (!name) { /* if no symbol name given */
_delsym(tab); /* delete all symbols */
tab->cnt = tab->level = 0; /* reset visibility level */
return 0; /* and symbol counter */
} /* and return 'ok' */
/* --- remove one symbol --- */
i = tab->hash(name, type) % tab->size;
p = tab->bvec +i; /* compute index of hash bucket */
while (*p) { /* and traverse bucket list */
if (((*p)->type == type) /* if symbol found */
&& (strcmp(name, (*p)->name) == 0))
break; /* abort loop */
p = &(*p)->succ; /* otherwise get successor */
} /* in hash bucket */
ste = *p; /* if the symbol does not exist, */
if (!ste) return -1; /* abort the function */
*p = ste->succ; /* remove symbol from hash bucket */
if (tab->delfn) tab->delfn(ste +1); /* delete user data */
free(ste); /* and symbol table element */
tab->cnt--; /* decrement symbol counter */
return 0; /* return 'ok' */
} /* st_remove() */
/*--------------------------------------------------------------------*/
void* st_lookup (SYMTAB *tab, const char *name, int type)
{ /* --- look up a symbol */
int i; /* index of hash bucket */
STE *ste; /* to traverse bucket list */
assert(tab && name); /* check arguments */
i = tab->hash(name, type) % tab->size;
ste = tab->bvec[i]; /* compute index of hash bucket */
while (ste) { /* and traverse bucket list */
if ((ste->type == type) /* if symbol found */
&& (strcmp(name, ste->name) == 0))
return ste +1; /* return pointer to assoc. data */
ste = ste->succ; /* otherwise get successor */
} /* in hash bucket */
return NULL; /* return 'not found' */
} /* st_lookup() */
/*--------------------------------------------------------------------*/
void st_endblk (SYMTAB *tab)
{ /* --- remove one visibility level */
int i; /* loop variable */
STE *ste, *tmp; /* to traverse bucket lists */
assert(tab); /* check for a valid symbol table */
if (tab->level <= 0) return; /* if on level 0, abort */
for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
ste = tab->bvec[i]; /* get next bucket list */
while (ste /* and remove all symbols */
&& (ste->level >= tab->level)) { /* of higher level */
tmp = ste; /* note symbol and */
ste = ste->succ; /* get successor */
if (tab->delfn) tab->delfn(tmp +1);
free(tmp); /* delete user data and */
tab->cnt--; /* symbol table element */
} /* and decrement symbol counter */
tab->bvec[i] = ste; /* set new start of bucket list */
}
tab->level--; /* go up one level */
} /* st_endblk() */
/*--------------------------------------------------------------------*/
#ifndef NDEBUG
void st_stats (const SYMTAB *tab)
{ /* --- compute and print statistics */
const STE *ste; /* to traverse bucket lists */
int i; /* loop variable */
int used; /* number of used hash buckets */
int len; /* length of current bucket list */
int min, max; /* min. and max. bucket list length */
int cnts[10]; /* counter for bucket list lengths */
assert(tab); /* check for a valid symbol table */
min = INT_MAX; max = used = 0;/* initialize variables */
for (i = 10; --i >= 0; ) cnts[i] = 0;
for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
len = 0; /* determine bucket list length */
for (ste = tab->bvec[i]; ste; ste = ste->succ) len++;
if (len > 0) used++; /* count used hash buckets */
if (len < min) min = len; /* determine minimal and */
if (len > max) max = len; /* maximal list length */
cnts[(len >= 9) ? 9 : len]++;
} /* count list length */
printf("number of symbols : %d\n", tab->cnt);
printf("number of hash buckets: %d\n", tab->size);
printf("used hash buckets : %d\n", used);
printf("minimal list length : %d\n", min);
printf("maximal list length : %d\n", max);
printf("average list length : %g\n", (double)tab->cnt/tab->size);
printf("ditto, of used buckets: %g\n", (double)tab->cnt/used);
printf("length distribution :\n");
for (i = 0; i < 9; i++) printf("%3d ", i);
printf(" >8\n");
for (i = 0; i < 9; i++) printf("%3d ", cnts[i]);
printf("%3d\n", cnts[9]);
} /* st_stats() */
#endif
/*----------------------------------------------------------------------
Name/Identifier Map Functions
----------------------------------------------------------------------*/
#ifdef NIMAPFN
NIMAP* nim_create (int init, int max, HASHFN hash, SYMFN delfn)
{ /* --- create a name/identifier map */
NIMAP *nim; /* created name/identifier map */
nim = st_create(init, max, hash, delfn);
if (!nim) return NULL; /* create a name/identifier map */
nim->vsz = 0; /* and clear the id. vector size */
return nim; /* return created name/id map */
} /* nim_create() */
/*--------------------------------------------------------------------*/
void nim_sort (NIMAP *nim, SYMCMPFN cmpfn, void *data,
int *map, int dir)
{ /* --- sort name/identifier map */
int i; /* loop variable */
int **p; /* to traverse the value vector */
assert(nim && cmpfn); /* check the function arguments */
v_sort(nim->ids, nim->cnt, cmpfn, data);
if (!map) { /* if no conversion map is requested */
for (p = nim->ids +(i = nim->cnt); --i >= 0; )
**--p = i; } /* just set new identifiers */
else { /* if a conversion map is requested, */
p = nim->ids +(i = nim->cnt); /* traverse the sorted vector */
if (dir < 0) /* if backward map (i.e. new -> old) */
while (--i >= 0) { map[i] = **--p; **p = i; }
else /* if forward map (i.e. old -> new) */
while (--i >= 0) { map[**--p] = i; **p = i; }
} /* (build conversion map) */
} /* nim_sort() */
/*--------------------------------------------------------------------*/
void nim_trunc (NIMAP *nim, int n)
{ /* --- truncate name/identifier map */
int *id; /* to access the identifiers */
while (nim->cnt > n) { /* while to remove mappings */
id = nim->ids[nim->cnt -1]; /* get the identifier object */
st_remove(nim, st_name(id), 0);
} /* remove the symbol table element */
} /* nim_trunc() */
#endif