www.pudn.com > FP-GROWTH.rar > fptree.c
/*----------------------------------------------------------------------
File : fptree.c
Contents: frequent pattern tree management
Author : Christian Borgelt
History : 21.11.2004 file created
22.11.2004 second projection added, bonsai pruning added
09.12.2004 adapted to changed report function
10.12.2004 adapted to general memory management system
28.12.2004 bug in function fpt_delete fixed
06.12.2005 bug in function _project2 fixed
----------------------------------------------------------------------*/
#include
#include
#include
#include "fptree.h"
#ifdef STORAGE
#include "storage.h"
#endif
/*----------------------------------------------------------------------
Preprocessor Definition
----------------------------------------------------------------------*/
#define BLKSIZE 6553 /* block size for memory management */
/*----------------------------------------------------------------------
Type Definition
----------------------------------------------------------------------*/
typedef FPTREE* PROJFN (FPTREE *fpt, int item);
typedef struct { /* --- structure for rec. search */
int min; /* minimum number of items */
int max; /* maximum number of items */
int supp; /* minimum support (num. of trans.) */
int bonsai; /* flag for pruning to bonsai */
PROJFN *proj; /* projection function */
FPTREPFN *report; /* report function for results */
void *data; /* user data for report function */
int cnt; /* number of frequent item sets */
int items[1]; /* item vector for reporting */
} FPRS; /* (structure for rec. search) */
/*----------------------------------------------------------------------
Main Functions
----------------------------------------------------------------------*/
static FPTREE* _create (MEMSYS *mem, int cnt)
{ /* --- create a base f.p. tree */
FPTREE *fpt; /* created frequent pattern tree */
FPTLIST *list; /* to traverse the node lists */
assert(cnt > 0); /* check the function arguments */
fpt = (FPTREE*)malloc(sizeof(FPTREE) +(cnt-1) *sizeof(FPTLIST));
if (!fpt) return NULL; /* allocate the base structure */
fpt->cnt = cnt; /* and note the number of items */
if (mem) fpt->mem = mem; /* if a memory management is given, */
else { /* simply store it, otherwise */
fpt->mem = ms_create(sizeof(FPTNODE), BLKSIZE);
if (!fpt->mem) { free(fpt); return NULL; }
} /* allocate a memory system */
for (list = fpt->lists +cnt; --cnt >= 0; ) {
(--list)->cnt = 0; list->node = NULL; }
return fpt; /* initialize the node lists and */
} /* _create() */ /* return the created f.p. tree */
/*--------------------------------------------------------------------*/
static int _build (FPTREE *fpt, FPTNODE *parent,
TASET *taset, int lft, int rgt, int pos)
{ /* --- recursively build f.p. tree */
int i, k; /* loop variable, buffer */
int item; /* to traverse the items at pos */
FPTNODE *node; /* created freq. pattern tree node */
assert(fpt && taset && (pos >= 0)); /* check the function arguments */
while ((lft <= rgt) && (tas_tsize(taset, lft) <= pos))
lft++; /* skip trans. that are too short */
if (lft > rgt) return 0; /* check for an empty range */
item = k = tas_tract(taset, i = rgt)[pos]; /* get first item */
do { /* traverse the longer transactions */
while (--i >= lft) { /* while not at start of section */
k = tas_tract(taset, i)[pos];
if (k != item) break; /* try to find a transaction */
} /* with a different item */
node = ms_alloc(fpt->mem); /* create a new tree node */
if (!node) return -1; /* for the current item */
node->item = item; /* and store the item */
node->succ = fpt->lists[item].node;
fpt->lists[item].node = node;
node->parent = parent; /* insert the node into the item list */
node->copy = NULL; /* and compute and sum the support */
fpt->lists[item].cnt += node->cnt = rgt -i;
if (_build(fpt, node, taset, i+1, rgt, pos+1) != 0)
return -1; /* build the child node recursively */
item = k; rgt = i; /* remove processed transaction from */
} while (lft <= rgt); /* the interval and note next item */
return 0; /* return 'ok' */
} /* _build() */
/*--------------------------------------------------------------------*/
FPTREE* fpt_create (TASET *taset)
{ /* --- create a freq. pattern tree */
FPTREE *fpt; /* created frequent pattern tree */
assert(taset); /* check the function argument */
fpt = _create(NULL, is_cnt(tas_itemset(taset)));
if (!fpt) return NULL; /* allocate a base f.p. tree */
fpt->itemset = tas_itemset(taset);
fpt->tra = tas_cnt(taset);
if ((fpt->tra > 0) /* if there is at least one trans. */
&& (_build(fpt, NULL, taset, 0, fpt->tra -1, 0) != 0)) {
fpt_delete(fpt); return NULL; }
return fpt; /* recursively build the frequent */
} /* fpt_create() */ /* pattern tree and return it */
/*----------------------------------------------------------------------
The above function assumes that the items in each transaction in taset
are sorted and that the transactions are sorted accordingly.
----------------------------------------------------------------------*/
void fpt_delete (FPTREE *fpt)
{ /* --- delete a freq. pattern tree */
assert(fpt); /* check the function argument */
ms_delete(fpt->mem); /* delete the memory system */
free(fpt); /* and the base structure */
} /* fpt_delete() */
/*--------------------------------------------------------------------*/
static void _prune (FPTREE *fpt)
{ /* --- prune nodes for last item */
FPTNODE *node, *buf; /* to traverse the nodes to delete */
assert(fpt); /* check the function argument */
for (node = fpt->lists[--fpt->cnt].node; node; ) {
buf = node; /* while there is another node */
node = node->succ; /* note the current node and */
ms_free(fpt->mem, buf); /* remove it from the list, */
} /* and then delete the node */
} /* _prune() */
/*--------------------------------------------------------------------*/
static void _detach (FPTREE *fpt)
{ /* --- detach a projection */
FPTNODE *node, *anc; /* to traverse the ancestors */
assert(fpt); /* check the function argument */
node = fpt->lists[--fpt->cnt].node;
while (node) { /* while there is another node */
for (anc = node->parent; anc && anc->copy; anc = anc->parent)
anc->copy = NULL; /* clear the copy pointers */
anc = node; /* note the current node and */
node = node->succ; /* remove it from the list, */
ms_free(fpt->mem, anc); /* then delete the node */
} /* (prune deepest tree level) */
} /* _detach() */
/*--------------------------------------------------------------------*/
static void _cleanup (FPTREE *fpt, FPTREE *proj)
{ /* --- clean up after failure */
assert(fpt && proj); /* check the function argument */
_detach(fpt); /* detach projection from tree */
while (proj->cnt > 0) _prune(proj);
free(proj); /* delete the projection */
} /* _cleanup() */ /* (only called on failure) */
/*--------------------------------------------------------------------*/
static FPTREE* _project1 (FPTREE *fpt, int item)
{ /* --- project a freq. pattern tree */
int i; /* loop variable */
FPTREE *proj; /* projected frequent pattern tree */
FPTNODE *node, *anc, *copy; /* to traverse the tree nodes */
FPTNODE **prev; /* to link copies to their ancestors */
FPTLIST *lists; /* to access the node lists */
assert(fpt); /* check the function argument */
proj = _create(fpt->mem, item);
if (!proj) return NULL; /* create a base freq. pattern tree */
proj->itemset = fpt->itemset; /* note the underlying item set */
lists = proj->lists; /* get the node lists of the proj. */
for (node = fpt->lists[item].node; node; node = node->succ) {
prev = NULL; /* traverse the nodes for the item */
for (anc = node->parent; anc && !anc->copy; anc = anc->parent) {
anc->copy = /* traverse and copy all ancestors */
copy = ms_alloc(fpt->mem); /* that are not yet copied */
if (!copy) { _cleanup(fpt, proj); return NULL; }
if (prev) *prev = copy; /* set parent link from child */
copy->item = i = anc->item;
copy->succ = lists[i].node;
lists[i].node = copy; /* insert copy into corresp. list */
lists[i].cnt += copy->cnt = node->cnt;
copy->copy = NULL; /* set the support of the node */
prev = ©->parent; /* and note the parent pointer */
} /* for later linking */
if (prev) /* set last parent pointer */
*prev = (anc) ? anc->copy : NULL;
for ( ; anc; anc = anc->parent) {
anc->copy->cnt += node->cnt;
lists[anc->item].cnt += node->cnt;
} /* traverse ancestors up to the root */
} /* and sum the support values */
_detach(fpt); /* detach created tree projection, */
return proj; /* prune last level of orig. tree, */
} /* _project1() */ /* and return created projection */
/*--------------------------------------------------------------------*/
static FPTREE* _project2 (FPTREE *fpt, int item)
{ /* --- project a freq. pattern tree */
int i, k; /* loop variables */
FPTREE *proj; /* projected frequent pattern tree */
FPTNODE *node, *par, *copy; /* to traverse the tree nodes */
FPTLIST *lists; /* to access the node lists */
assert(fpt); /* check the function argument */
proj = _create(fpt->mem, item);
if (!proj) return NULL; /* create a base freq. pattern tree */
proj->itemset = fpt->itemset; /* note the underlying item set */
lists = proj->lists; /* get the node lists of the proj. */
for (node = fpt->lists[item].node; node; node = node->succ) {
par = node->parent; /* traverse the nodes with parents */
if (!par) continue; /* in the projected tree */
par->copy = /* create a copy of the parent node */
copy = ms_alloc(fpt->mem); /* (does not exist yet) */
if (!copy) { _cleanup(fpt, proj); return NULL; }
copy->item = i = par->item; /* copy the item of the node and */
copy->succ = lists[i].node; /* insert copy into corresp. list */
lists[i].node = copy; /* in the projected tree */
lists[i].cnt += copy->cnt = node->cnt;
copy->parent = par->parent;
copy->copy = NULL; /* set the support of the node */
}
for (k = item; --k > 0; ) { /* traverse the proj. tree levels */
for (node = lists[k].node; node; node = node->succ) {
par = node->parent; /* traverse the nodes with parents */
if (!par) continue; /* in the projected tree */
copy = par->copy; /* get the copy of the parent */
if (copy) { /* if a copy already exists, */
node->parent = copy; /* set the parent pointer */
copy->cnt += node->cnt;
lists[copy->item].cnt += node->cnt;
continue; /* sum the number of transactions */
} /* and go to the next node */
node->parent = /* create a copy of the parent node */
par->copy = copy = ms_alloc(fpt->mem);
if (!copy) { _cleanup(fpt, proj); return NULL; }
copy->item = i = par->item; /* copy the item of the node and */
copy->succ = lists[i].node; /* insert copy into corresp. list */
lists[i].node = copy; /* in the projected tree */
lists[i].cnt += copy->cnt = node->cnt;
copy->parent = par->parent;
copy->copy = NULL; /* set the support of the node */
}
}
_detach(fpt); /* detach created tree projection, */
return proj; /* prune last level of orig. tree, */
} /* _project2() */ /* and return created projection */
/*----------------------------------------------------------------------
Of the above projection methods method 1 generally seems to yield
better results, which is why it is the default.
----------------------------------------------------------------------*/
static void _bonsai (FPTREE *fpt, int supp)
{ /* --- prune projection to bonsai */
int i, k; /* loop variables, buffers */
FPTLIST *lists; /* to traverse the level lists */
FPTNODE *node, *anc, *prev; /* to traverse the tree nodes */
lists = fpt->lists; /* buffer the tree level vector */
for (i = fpt->cnt; (--i >= 0) && (lists[i].cnt < supp); )
_prune(fpt); /* prune deepest tree levels */
for (k = 0; k < fpt->cnt; k++)/* traverse the levels downwards */
if ((lists[k].cnt < supp) && (lists[k].cnt > 0))
break; /* find first infrequent item */
for (i = k; ++i < fpt->cnt;){ /* traverse the deeper tree levels */
if (lists[i].cnt < supp) /* skip tree levels */
continue; /* that refer to an infrequent item */
prev = NULL; /* init. the previous node */
for (node = lists[i].node; node; node = node->succ) {
for (anc = node->parent; anc && (lists[anc->item].cnt < supp); )
anc = anc->parent; /* skip infrequent ancestor items */
if (anc && anc->copy) /* if the parent has been merged, */
anc = anc->copy; /* go to the node it was merged to */
if (prev && (anc == prev->parent)) {
prev->cnt += node->cnt; /* if two consecutive nodes have the */
node->copy = prev; } /* same parent, merge the two nodes */
else { /* if there is no previous node */
node->parent = anc; /* or it has a different parent, */
prev = node; /* just set the new parent node */
} /* and note the current node */
} /* as the previous one */
} /* (for a possible later merger) */
for ( ; k < fpt->cnt; k++) { /* traverse the deeper tree levels */
node = lists[k].node; /* get the node list of the level */
if (!node) continue; /* skip empty tree levels */
if (lists[k].cnt < supp) { /* if a level is to be deleted */
do { /* delete the node list: */
anc = node; /* note the current node, */
node = node->succ; /* go to the successor node, */
ms_free(fpt->mem, anc); /* and delete the current node */
} while (node); /* while the list is not empty */
lists[k].node = NULL; lists[k].cnt = 0;
continue; /* clear the list variables */
}
while (node->succ) { /* while not at last list element */
if (!node->succ->copy) { /* if the successor was not merged, */
node = node->succ; continue; } /* keep the successor */
anc = node->succ; /* if the successor was merged, */
node->succ = anc->succ; /* note the successor node, */
ms_free(fpt->mem, anc); /* remove it from the list, */
} /* and then delete it */
}
} /* _bonsai() */
/*----------------------------------------------------------------------
For clarity and memory efficiency the above function already removes the
pruned tree levels. This is not really necessary as it would otherwise
be done in the search function, once the level to remove becomes the
deepest in the pruned tree.
----------------------------------------------------------------------*/
static int _search (FPTREE *fpt, int d, FPRS *rs)
{ /* --- rec. search frequent patterns */
int i, k; /* loop variable, result buffer */
FPTREE *proj; /* projected frequent pattern tree */
FPTLIST *list; /* node list for current item */
assert(fpt); /* check the function argument */
d++; /* increment the recursion depth */
for (i = fpt->cnt; --i >= 0; ) {
list = fpt->lists +i; /* traverse the items and their lists */
if (list->cnt < rs->supp) { /* skip infrequent items */
_prune(fpt); continue; } /* (just prune their node list) */
rs->items[d-1] = i; /* store the last item of the set */
if (d >= rs->min) /* report the found freq. item set */
rs->cnt += rs->report(rs->items, d, list->cnt, rs->data);
if (!fpt->lists[i].node /* if the node list is empty */
|| (d >= rs->max) /* or maximal size has been reached */
|| (i <= 0)) { /* or at first (and thus only) item, */
_prune(fpt); continue; } /* skip the recursion */
proj = rs->proj(fpt, i); /* project the freq. pattern tree, */
if (rs->bonsai) /* if the bonsai flag is set, */
_bonsai(proj, rs->supp); /* prune the projection to a bonsai */
k = _search(proj, d, rs); /* recursively search the projection, */
free(proj); /* and then delete the proj. again */
if (k) return -1; /* if an error occurred, abort */
}
return 0; /* return 'ok' */
} /* _search() */
/*--------------------------------------------------------------------*/
int fpt_search (FPTREE *fpt, int supp, int min, int max, int mode,
FPTREPFN report, void *data)
{ /* --- search frequent patterns */
int n; /* number of frequent item sets */
FPRS *rs; /* structure for recursive search */
assert(fpt /* check the function arguments */
&& (supp > 0) && (max >= min) && (min >= 0) && report);
rs = (FPRS*)malloc(sizeof(FPRS) +(fpt->cnt -1) *sizeof(int));
if (!rs) return -1; /* create recursive search structure */
rs->supp = (supp > 0) ? supp : 1;
rs->min = min; /* initialize the fields */
rs->max = max; /* with the given parameters */
rs->bonsai = (mode & FPT_BONSAI) ? 1 : 0;
rs->proj = (mode & FPT_ALTPROJ) ? _project2 : _project1;
rs->report = report;
rs->data = data;
rs->cnt = 0; /* initialize the item set counter */
if (_search(fpt, 0, rs) != 0) /* search the tree recursively */
return -1; /* for frquent patterns */
n = rs->cnt; free(rs); /* delete the search structure and */
return n; /* return number of freq. item sets */
} /* fpt_search() */
/*--------------------------------------------------------------------*/
#ifndef NDEBUG
void fpt_show (FPTREE *fpt, const char *title)
{ /* --- show a freq. pattern tree */
int i; /* loop variable */
FPTNODE *node; /* to traverse the node lists */
printf("\n%s\n", title); /* leave one line empty */
for (i = 0; i < fpt->cnt; i++) { /* traverse the items */
printf("%s ", is_name(fpt->itemset, i)); /* print the item */
printf("(%d):", fpt->lists[i].cnt); /* and its support */
for (node = fpt->lists[i].node; node; node = node->succ) {
printf(" %d", node->cnt); /* print the node support */
if (node->parent) /* if the parent exists */
printf("[%s]", is_name(fpt->itemset, node->parent->item));
} /* print the item in the parent */
printf("\n"); /* terminate the line */
} /* after each node list */
} /* fpt_show() */
#endif