www.pudn.com > bayes.rar > fbayes.c


/*----------------------------------------------------------------------
  File    : fbayes.c
  Contents: Full Bayes classifier management
  Author  : Christian Borgelt
  History : 26.11.2000 file created
            29.11.2000 first version completed
            15.07.2001 parser improved (global variables removed)
            16.07.2001 adapted to modified module scan
            17.07.2001 parser improved (conditional look ahead)
            15.09.2001 '*' instead of list of attributes made possible
            26.04.2003 function fbc_rand added
            12.08.2004 adapted to new module parse
----------------------------------------------------------------------*/
#include 
#include 
#include 
#include 
#include 
#include "fbayes.h"
#ifdef STORAGE
#include "storage.h"
#endif

/*----------------------------------------------------------------------
  Preprocessor Definitions
----------------------------------------------------------------------*/
#define EPSILON     1e-12       /* to handle roundoff errors */
#define BLKSIZE     16          /* block size for vectors */

/*----------------------------------------------------------------------
  Auxiliary Functions
----------------------------------------------------------------------*/
#ifdef FBC_INDUCE

static int _clsrsz (FBC *fbc, int clscnt)
{                               /* --- resize class dependent vectors */
  int    i;                     /* loop variable */
  int    clsvsz;                /* size of the class dep. vectors */
  double *frq;                  /* to traverse the frequency vectors */
  MVNORM **mvn;                 /* to traverse the distributions */

  assert(fbc && (clscnt >= 0)); /* check the function arguments */

  /* --- resize the class dependent vectors --- */
  clsvsz = fbc->clsvsz;         /* get the class dep. vector size */
  if (clscnt >= clsvsz) {       /* if the vectors are too small */
    clsvsz += (clsvsz > BLKSIZE) ? clsvsz >> 1 : BLKSIZE;
    if (clscnt >= clsvsz) clsvsz = clscnt;
    frq = (double*)realloc(fbc->frqs, clsvsz *3 *sizeof(double));
    if (!frq) return -1;        /* resize the frequencies vector */
    fbc->frqs   = frq;          /* and set the new vector */
    fbc->priors = fbc->frqs   +clsvsz;  /* organize the rest of the */
    fbc->posts  = fbc->priors +clsvsz;  /* allocated memory block */
    for (frq += clsvsz, i = clsvsz -fbc->clsvsz; --i >= 0; )
      *--frq = 0;               /* clear the new vector fields */
    mvn = (MVNORM**)realloc(fbc->mvns, clsvsz *sizeof(MVNORM*));
    if (!mvn) return -1;        /* resize the distribution vector */
    fbc->mvns   = mvn;          /* and set the new vector */
    fbc->clsvsz = clsvsz;       /* set the new size of the */
  }                             /* class dependent vectors */

  /* --- create new conditional distributions --- */
  mvn = fbc->mvns +clscnt;      /* traverse the normal distributions */
  for (i = clscnt -fbc->clscnt; --i >= 0; ) {
    *--mvn = mvn_create(fbc->numcnt);
    if (!*mvn) break;           /* create new normal distributions */
  }
  if (i >= 0) {                 /* if an error occurred */
    for (i = fbc->clscnt -i; --i >= 0; mvn++)
      mvn_delete(*mvn);         /* delete the newly created */
    return -1;                  /* multivariate normal distributions */
  }                             /* and abort the function */

  fbc->clscnt = clscnt;         /* set the new number of classes */
  return 0;                     /* return 'ok' */
}  /* _clsrsz() */

#endif
/*--------------------------------------------------------------------*/

static void _getvals (FBC *fbc, const TUPLE *tpl)
{                               /* --- get the attribute values */
  int    i;                     /* loop variable */
  const  INST *inst;            /* to traverse the instances */
  FBCID  *p;                    /* to traverse the attribute ids. */
  double *v;                    /* to traverse the att. value vector */

  assert(fbc);                  /* check the function argument */
  v = fbc->vals +fbc->numcnt;   /* get the attribute value vector */
  for (p = fbc->numids +(i = fbc->numcnt); --i >= 0; ) {
    --p;                        /* traverse the numeric attributes */
    inst = (tpl) ? tpl_colval(tpl, p->id) : att_inst(p->att);
    if (p->type == AT_FLT)      /* if the attribute is real-valued */
      *--v = (inst->f <= UV_FLT) ? MVN_UNKNOWN : (double)inst->f;
    else                        /* if the attribute is integer-valued */
      *--v = (inst->i <= UV_INT) ? MVN_UNKNOWN : (double)inst->i;
  }                             /* (collect attribute values) */
}  /* _getvals() */

/*----------------------------------------------------------------------
  Main Functions
----------------------------------------------------------------------*/

FBC* fbc_create (ATTSET *attset, int clsid)
{                               /* --- create a full Bayes classifier */
  int    i, t;                  /* loop variable, attribute type */
  FBC    *fbc;                  /* created classifier */
  FBCID  *p;                    /* to traverse the attribute ids. */
  ATT    *att;                  /* to traverse the attributes */
  double *frq;                  /* to traverse the frequency vector */
  MVNORM **mvn;                 /* to traverse the distributions */

  assert(attset && (clsid >= 0) /* check the function arguments */
      && (clsid < as_attcnt(attset))
      && (att_type(as_att(attset, clsid)) == AT_SYM));

  /* --- create the classifier body --- */
  i   = as_attcnt(attset);      /* get the number of attributes */
  fbc = (FBC*)malloc(sizeof(FBC) +(i-1) *sizeof(int));
  if (!fbc) return NULL;        /* allocate the classifier body */
  fbc->attset = attset;         /* and initialize the fields */
  fbc->attcnt = i;
  fbc->numcnt = 0;
  fbc->numids = NULL;
  fbc->clsid  = clsid;
  fbc->clsvsz = att_valcnt(as_att(attset, clsid));
  fbc->clscnt = fbc->clsvsz;
  fbc->total  = 0;
  fbc->lcorr  = 0;
  fbc->mode   = 0;
  fbc->frqs   = fbc->priors = fbc->posts = NULL;
  fbc->vals   = NULL;           /* clear pointers for a */
  fbc->mvns   = NULL;           /* proper cleanup on error */

  /* --- create the attribute information --- */
  fbc->numids = p = (FBCID*)malloc(fbc->attcnt *sizeof(FBCID));
  if (!p) { fbc_delete(fbc, 0); return NULL; }
  for (i = 0; i < fbc->attcnt; i++) {
    att = as_att(attset, i); t = att_type(att);
    if ((t != AT_INT) && (t != AT_FLT)) continue;
    p->id  = i; p->type = t;    /* count the numeric attributes */ 
    p->att = att; p++;          /* and note their identifications */
  }                             /* and types */
  fbc->numcnt = (int)(p -fbc->numids);
  fbc->vals = (double*)malloc(fbc->attcnt *sizeof(double));
  if (!fbc->vals) { fbc_delete(fbc, 0); return NULL; }
                                /* create an attribute value buffer */

  /* --- initialize the distributions --- */
  if (fbc->clscnt > 0) {        /* if there are classes, */
    fbc->frqs =                 /* allocate class vectors */
    frq = (double*)malloc(fbc->clsvsz *3 *sizeof(double));
    if (!frq) { fbc_delete(fbc, 0); return NULL; }
    fbc->priors = fbc->frqs   +fbc->clsvsz;
    fbc->posts  = fbc->priors +fbc->clsvsz;
    for (frq += i = fbc->clsvsz; --i >= 0; )
      *--frq = 0;               /* init. the class frequencies */
    fbc->mvns = mvn = (MVNORM**)calloc(fbc->clscnt, sizeof(MVNORM*));
    if (!mvn) { fbc_delete(fbc, 0); return NULL; }
    for (mvn += i = fbc->clscnt; --i >= 0; ) {
      *--mvn = mvn_create(fbc->numcnt);
      if (!*mvn) { fbc_delete(fbc, 0); return NULL; }
    }                           /* allocate a distribution vector and */
  }                             /* create multivariate normal dists. */

  return fbc;                   /* return the created classifier */
}  /* fbc_create() */

/*--------------------------------------------------------------------*/

FBC* fbc_dup (const FBC *fbc, int dupas)
{                               /* --- duplicate a full Bayes class. */
  int    i;                     /* loop variable */
  FBC    *dup;                  /* created classifier duplicate */
  ATTSET *attset;               /* duplicate of attribute set */
  double *df; const double *sf; /* to traverse the frequency vectors */
  FBCID  *di; const FBCID  *si; /* to traverse the attribute ids. */
  MVNORM **mvn;                 /* to traverse the distributions */

  assert(fbc);                  /* check the function argument */

  /* --- copy the classifier body --- */
  attset = fbc->attset;         /* get the attribute set */
  if (dupas) {                  /* if the corresp. flag is set, */
    attset = as_dup(attset);    /* duplicate the attribute set */
    if (!attset) return NULL;   /* of the original classifier, */
  }                             /* and then create a classifier */
  dup = (FBC*)malloc(sizeof(FBC) +(fbc->attcnt -1) *sizeof(int));
  if (!dup) { if (dupas) as_delete(attset); return NULL; }
  dup->attset = attset;         /* allocate a classifier body */
  dup->attcnt = fbc->attcnt;    /* and copy the fields */
  dup->numcnt = fbc->numcnt;
  dup->numids = NULL;
  dup->clsid  = fbc->clsid;
  dup->clsvsz = fbc->clscnt;
  dup->clscnt = fbc->clscnt;
  dup->total  = fbc->total;
  dup->lcorr  = fbc->lcorr;
  dup->mode   = fbc->mode;
  dup->frqs   = dup->priors = dup->posts = NULL;
  dup->vals   = NULL;           /* clear pointers for a */
  dup->mvns   = NULL;           /* proper cleanup on error */

  /* --- copy the attribute information --- */
  dup->numids = di = (FBCID*)malloc(dup->attcnt *sizeof(FBCID));
  if (!di) { fbc_delete(dup, dupas); return NULL; }
  si = fbc->numids +dup->numcnt;
  for (di += i = dup->numcnt; --i >= 0; )
    *--di = *--si;              /* copy the attribute identifications */
  dup->vals = (double*)malloc(dup->attcnt *sizeof(double));
  if (!dup->vals) { fbc_delete(dup, dupas); return NULL; }
                                /* create an attribute value buffer */

  /* --- copy the distributions --- */
  if (dup->clscnt > 0) {        /* if there are classes, */
    dup->frqs =                 /* allocate class vectors */
    df = (double*)malloc(dup->clsvsz *3 *sizeof(double));
    if (!df) { fbc_delete(dup, dupas); return NULL; }
    dup->priors = dup->frqs   +dup->clsvsz;
    dup->posts  = dup->priors +dup->clsvsz;
    sf = fbc->frqs +2 *dup->clscnt;
    for (df += i = 2 *dup->clscnt; --i >= 0; )
      *--df = *--sf;            /* copy the class frequencies */
    dup->mvns = mvn = (MVNORM**)calloc(dup->clscnt, sizeof(MVNORM*));
    if (!mvn) { fbc_delete(dup, dupas); return NULL; }
    for (mvn += i = dup->clscnt; --i >= 0; ) {
      *--mvn = mvn_dup(fbc->mvns[i]);
      if (!*mvn) { fbc_delete(dup, dupas); return NULL; }
    }                           /* allocate a distribution vector and */
  }                             /* copy multivariate normal distribs. */

  return dup;                   /* return the created duplicate */
}  /* fbc_dup() */

/*--------------------------------------------------------------------*/

void fbc_delete (FBC *fbc, int delas)
{                               /* --- delete a full Bayes classifier */
  int    i;                     /* loop variable */
  MVNORM **p;                   /* to traverse the distrib. vector */

  assert(fbc);                  /* check the function argument */
  if (fbc->mvns) {              /* if there is a distribution vector */
    for (p = fbc->mvns +(i = fbc->clscnt); --i >= 0; )
      if (*--p) mvn_delete(*p); /* delete the multivar. normal dists. */
    free(fbc->mvns);            /* and delete the vector itself, */
  }                             /* then delete the frequency vectors */
  if (fbc->frqs)   free(fbc->frqs);
  if (fbc->vals)   free(fbc->vals);
  if (fbc->numids) free(fbc->numids);
  if (delas)       as_delete(fbc->attset);
  free(fbc);                    /* delete the classifier body */
}  /* fbc_delete() */

/*--------------------------------------------------------------------*/

void fbc_clear (FBC *fbc)
{                               /* --- clear a full Bayes classifier */
  int    i;                     /* loop variables */
  double *frq;                  /* to traverse the frequency vectors */

  assert(fbc);                  /* check the function argument */
  fbc->total = 0;               /* clear the total number of cases */
  for (frq = fbc->frqs +(i = fbc->clscnt); --i >= 0; ) {
    *--frq = 0;                 /* clear the frequency distribution */
    mvn_clear(fbc->mvns[i]);    /* and the multivariate normal */
  }                             /* distributions */
}  /* fbc_clear() */

/*--------------------------------------------------------------------*/
#ifdef FBC_INDUCE

int fbc_add (FBC *fbc, const TUPLE *tpl)
{                               /* --- add an instantiation */
  int   cls;                    /* value of class attribute */
  float wgt;                    /* instantiation weight */

  assert(fbc);                  /* check the function argument */

  /* --- get class and weight --- */
  if (tpl) {                    /* if a tuple is given */
    cls = tpl_colval(tpl, fbc->clsid)->i;
    wgt = tpl_getwgt(tpl); }    /* get the class and the tuple weight */
  else {                        /* if no tuple is given */
    cls = att_inst(as_att(fbc->attset, fbc->clsid))->i;
    wgt = as_getwgt(fbc->attset);
  }                             /* get the class and the inst. weight */
  if (cls < 0) return 0;        /* if the class is unknown, abort */
  assert(wgt >= 0.0F);          /* check the tuple weight */

  /* --- update the class distribution --- */
  if ((cls >= fbc->clscnt)      /* if the class is a new one, */
  &&  (_clsrsz(fbc,cls+1) != 0))/* resize the class dependent vectors */
    return -1;                  /* (frequencies and distributions) */
  fbc->frqs[cls] += wgt;        /* update the class frequency */
  fbc->total     += wgt;        /* and the total frequency */

  /* --- update the conditional distributions --- */
  _getvals(fbc, tpl);           /* get the attribute value vector */
  mvn_add(fbc->mvns[cls], fbc->vals, wgt);
  return 0;                     /* add inst. to the cond. distrib. */
}  /* fbc_add() */              /* return 'ok' */

/*--------------------------------------------------------------------*/

FBC* fbc_induce (TABLE *table, int clsid, int mode, double lcorr)
{                               /* --- create a full Bayes classifier */
  int    i;                     /* loop variable */
  FBC    *fbc;                  /* created full Bayes classifier */
  ATTSET *attset;               /* attribute set of the classifier */

  assert(table                  /* check the function arguments */
      && (clsid >= 0) && (clsid < tab_colcnt(table))
      && (att_type(as_att(tab_attset(table), clsid)) == AT_SYM));

  /* --- create a classifier --- */
  attset = tab_attset(table);   /* get the attribute set of the table */
  if (mode & FBC_DUPAS) {       /* if the corresp. flag is set, */
    attset = as_dup(attset);    /* duplicate the attribute set */
    if (!attset) return NULL;   /* of the given data table, */
  }                             /* then create a classifier */
  fbc = fbc_create(attset, clsid);
  if (!fbc) { if (mode & FBC_DUPAS) as_delete(attset); return NULL; }

  /* --- build the classifier --- */
  for (i = tab_tplcnt(table); --i >= 0; )
    fbc_add(fbc, tab_tpl(table, i));        /* add all tuples */
  fbc_setup(fbc, mode, lcorr);   /* and set up the classifier */

  return fbc;                    /* return the created classifier */
}  /* fbc_induce() */

/*--------------------------------------------------------------------*/

int fbc_mark (FBC *fbc)
{                               /* --- mark selected attributes */
  int   i;                      /* loop variable, attibute counter */
  FBCID *p;                     /* to traverse the attribute ids. */

  assert(fbc);                  /* check the function argument */
  for (i = fbc->attcnt; --i >= 0; )   /* unmark all attributes */
    att_setmark(as_att(fbc->attset, i), -1);
  for (p = fbc->numids +(i = fbc->numcnt); --i >= 0; ) {
    --p; att_setmark(p->att, 1); }    /* mark all numeric attributes */
  att_setmark(as_att(fbc->attset, fbc->clsid), 0);
  return fbc->numcnt +1;        /* mark the class attribute and */
}  /* fbc_mark() */             /* return the number of marked atts. */

#endif
/*--------------------------------------------------------------------*/

void fbc_setup (FBC *fbc, int mode, double lcorr)
{                               /* --- set up a full Bayes classifier */
  int    i, n;                  /* loop variables */
  double cnt;                   /* number of cases, sum of priors */
  double *frq, *prb;            /* to traverse the value frqs./probs. */
  MVNORM **mvn;                 /* to traverse the distributions */

  assert(fbc && (lcorr >= 0));  /* check the function arguments */
  fbc->mode  = mode = mode & FBC_MAXLLH;
  fbc->lcorr = lcorr;           /* note estimation parameters */

  /* --- estimate class probabilities --- */
  n   = fbc->clscnt;            /* get the number of classes and */
  prb = fbc->priors +n;         /* traverse the class probabilities */
  cnt = fbc->total +lcorr *fbc->clscnt;
  if (cnt <= 0)                 /* if the denominator is invalid, */
    while (--n >= 0) *--prb = 0;       /* clear all probabilities */
  else {                        /* if the denominator is valid, */
    frq = fbc->frqs +n;         /* traverse the class frequencies */
    while (--n >= 0) *--prb = (*--frq +lcorr) /cnt;
  }                             /* estimate the class probabilities */

  /* --- estimate conditional probabilities --- */
  mode |= MVN_EXPVAR|MVN_COVAR|MVN_INVERSE|MVN_DECOM;
  for (mvn = fbc->mvns +(i = fbc->clscnt); --i >= 0; )
    mvn_calc(*--mvn, mode);     /* calculate all parameters */
}  /* fbc_setup() */

/*--------------------------------------------------------------------*/

int fbc_exec (FBC *fbc, const TUPLE *tpl, double *conf)
{                               /* --- execute a full Bayes class. */
  int    i;                     /* loop variable */
  double *s, *d;                /* to traverse the probabilities */
  MVNORM **mvn;                 /* to traverse the distributions */
  double sum;                   /* sum of class probabilities */

  assert(fbc);                  /* check the function argument */
  _getvals(fbc, tpl);           /* get the attribute value vector */
  s = fbc->priors +fbc->clscnt; /* get the prior     distribution */
  d = fbc->posts  +fbc->clscnt; /* and the posterior distribution */
  for (mvn = fbc->mvns +(i = fbc->clscnt); --i >= 0; ) {
    --mvn;                      /* traverse the cond. distributions */
    *--d = *--s * mvn_eval(*mvn, fbc->vals);
  }                             /* compute the posterior probability */
  for (s = d, sum = *s, i = fbc->clscnt; --i > 0; ) {
    if (*++s > *d) d = s;       /* find the most probable class */
    sum += *s;                  /* and sum all probabilities */
  }                             /* (for the later normalization) */
  if (conf) *conf = (sum > 0) ? *d /sum : 0;
  return (int)(d -fbc->posts);  /* compute a confidence value and */
}  /* fbc_exec() */             /* return the classification result */

/*--------------------------------------------------------------------*/

double* fbc_rand (FBC *fbc, double drand (void))
{                               /* --- generate a random tuple */
  int    i;                     /* loop variable */
  double t, sum;                /* random number, sum of probs. */
  double *p = fbc->priors;      /* to access the class probabilities */
  FBCID  *q = fbc->numids;      /* to traverse the attributes */

  t = drand();                  /* generate a random number */
  for (sum = i = 0; i < fbc->clscnt; i++) {
    sum += p[i]; if (sum >= t) break; }
  if (i >= fbc->clscnt)         /* find the class that corresponds */
    i = fbc->clscnt -1;         /* to the generated random number */
  att_inst(as_att(fbc->attset, fbc->clsid))->i = i;
  p = mvn_rand(fbc->mvns[i], drand);   /* generate a random point */
  for (q = fbc->numids +(i = fbc->numcnt); --i >= 0; ) {
    --q; att_inst(q->att)->f = (float)p[i]; }
  return p;                     /* copy the point to the att. set */
}  /* fbc_rand() */             /* and return the generated point */

/*--------------------------------------------------------------------*/

int fbc_desc (FBC *fbc, FILE *file, int mode, int maxlen)
{                               /* --- describe a full Bayes class. */
  int   i, k;                   /* loop variables */
  int   pos, ind;               /* current position and indentation */
  int   len;                    /* length of a class value name */
  ATT   *att;                   /* to traverse the attributes */
  FBCID *p;                     /* to traverse the attribute ids. */
  char  buf[4*AS_MAXLEN+4];     /* output buffer */

  assert(fbc && file);          /* check the function arguments */

  /* --- print a header (as a comment) --- */
  if (mode & FBC_TITLE) {       /* if the title flag is set */
    i = k = (maxlen > 0) ? maxlen -2 : 70;
    fputs("/*", file); while (--i >= 0) fputc('-', file);
    fputs("\n  full Bayes classifier\n", file);
    while (--k >= 0) fputc('-', file); fputs("*/\n", file);
  }                             /* print a title header */
  if (maxlen <= 0) maxlen = INT_MAX;

  /* --- start description --- */
  att = as_att(fbc->attset, fbc->clsid);
  sc_format(buf, att_name(att), 0);
  fputs("fbc(", file);          /* get the class attribute name */
  fputs(buf, file);             /* and print it */
  fputs(") = {\n", file);       /* start the classifier */
  if ((fbc->lcorr > 0)          /* if estimation parameters */
  ||   fbc->mode) {             /* differ from default values */
    fprintf(file, "  params = %g", fbc->lcorr);
    if (fbc->mode & FBC_MAXLLH) fputs(", maxllh", file);
    fputs(";\n", file);         /* print Laplace correction */
  }                             /* and estimation mode */

  /* --- print class distribution --- */
  fputs("  prob(", file);       /* print a distribution indicator */
  fputs(buf, file);             /* print the class att. name and */
  fputs(") = {\n    ", file);   /* start the the class distribution */
  ind = att_valwd(att, 0) +4;   /* compute the indentation and */
  for (i = 0; i < fbc->clscnt; i++) {  /* traverse the classes */
    if (i > 0)                  /* if this is not the first class, */
      fputs(",\n    ", file);   /* start a new output line */
    len = sc_format(buf, att_valname(att, i), 0);
    fputs(buf, file);           /* get and print the class name */
    for (pos = len+4; pos < ind; pos++)
      putc(' ', file);          /* pad with blanks to equal width */
    fprintf(file, ": %g", fbc->frqs[i]);
    if (mode & FBC_REL)         /* print the absolute class frequency */
      fprintf(file, " (%.1f%%)", fbc->priors[i] *100);
  }                             /* print the relative class frequency */
  fputs(" };\n", file);         /* terminate the class distribution */

  /* --- print conditional distributions --- */
  if (fbc->numcnt > 0) {        /* if there are numeric attributes */
    fputs("  prob(", file);     /* print a distribution indicator */
    pos = ind = 7;              /* and traverse the num. attributes */
    for (p = fbc->numids, i = 0; i < fbc->numcnt; p++, i++) {
      if (i > 0) {              /* if this is not the first att., */
        fputc(',', file); pos++; }           /* print a separator */
      len = sc_format(buf, att_name(p->att), 0);
      if ((pos +len > maxlen-1) /* get the condition name and */
      &&  (pos      > ind)) {   /* if the line would get too long, */
        fputc('\n', file);      /* start a new line and indent */
        for (pos = 0; pos < ind; pos++) fputc(' ', file);
      }                         /* indent to the opening parenthesis */
      fputs(buf, file); pos += len;
    }                           /* print the name of the attribute */
    fputc('|', file);           /* print condition indicator */
    att = as_att(fbc->attset, fbc->clsid);
    sc_format(buf, att_name(att), 0);
    fputs(buf, file);           /* print the class attribute name */
    fputs(") = {\n    ", file); /* and start the distribution */
    ind = att_valwd(att, 0) +4; /* compute the indentation and */
    for (i = 0; i < fbc->clscnt; i++) {  /* traverse the classes */
      if (i > 0)                /* if this is not the first class, */
        fputs(",\n    ", file); /* start a new output line */
      len = sc_format(buf, att_valname(att, i), 0);
      fputs(buf, file);         /* get and print the class name */
      for (pos = len+4; pos < ind; pos++)
        putc(' ', file);        /* pad with blanks to equal width */
      fputs(": N(", file);      /* start a normal distribution */
      mvn_desc(fbc->mvns[i], file, -(ind+4), maxlen);
      fputc(')', file);         /* describe multivar. normal dists. */
    }                           /* and terminate the distribution */
    fputs(" };\n", file);       /* terminate the cond. distribution */
  }
  fputs("};\n", file);          /* terminate the classifier */

  return ferror(file) ? -1 : 0; /* return the write status */
}  /* fbc_desc() */

/*--------------------------------------------------------------------*/
#ifdef FBC_PARSE

static int _parse (ATTSET *attset, SCAN *scan, FBC **pfbc)
{                               /* --- parse a full Bayes classifier */
  int    i = -1, t;             /* loop variable, buffer */
  int    clsid, attid;          /* (class) attribute index */
  ATT    *att;                  /* class attribute */
  FBC    *fbc;                  /* created full Bayes classifier */
  double *p, f;                 /* to traverse the frequencies */
  int    *flags;                /* to traverse the attribute flags */
  FBCID  *ni;                   /* to traverse the numeric att. ids. */

  /* --- read start of description --- */
  if ((sc_token(scan) != T_ID)
  ||  (strcmp(sc_value(scan), "fbc") != 0))
    ERR_STR("fbc");             /* check for 'fbc' */
  GET_TOK();                    /* consume 'fbc' */
  GET_CHR('(');                 /* consume '(' */
  t = sc_token(scan);           /* check for a name */
  if ((t != T_ID) && (t != T_NUM)) ERROR(E_ATTEXP);
  clsid = as_attid(attset, sc_value(scan));
  if (clsid < 0)                   ERROR(E_UNKATT);
  att = as_att(attset, clsid);  /* get and check the class attribute */
  if (att_type(att) != AT_SYM)     ERROR(E_CLSTYPE);
  if (att_valcnt(att) < 1)         ERROR(E_CLSCNT);
  *pfbc = fbc = fbc_create(attset, clsid);
  if (!fbc) ERROR(E_NOMEM);     /* create a full Bayes classifier */
  GET_TOK();                    /* consume the class name */
  GET_CHR(')');                 /* consume '(' */
  GET_CHR('=');                 /* consume '=' */
  GET_CHR('{');                 /* consume '{' */

  /* --- read parameters --- */
  if ((sc_token(scan) == T_ID)  /* if 'params' follows */
  &&  (strcmp(sc_value(scan), "params") == 0)) {
    GET_TOK();                  /* consume 'params' */
    GET_CHR('=');               /* consume '=' */
    if (sc_token(scan) != T_NUM)  ERROR(E_NUMEXP);
    fbc->lcorr = atof(sc_value(scan));
    if (fbc->lcorr < 0)           ERROR(E_ILLNUM);
    GET_TOK();                  /* get Laplace correction */
    while (sc_token(scan) == ',') {
      GET_TOK();                /* read list of parameters */
      if (sc_token(scan) != T_ID) ERROR(E_PAREXP);
      if (strcmp(sc_value(scan), "maxllh") == 0)
        fbc->mode |= FBC_MAXLLH;/* use max. likelihood estimate */
      else ERROR(E_PAREXP);     /* abort on all other values */
      GET_TOK();                /* consume the estimator flag */
    }
    GET_CHR(';');               /* consume ';' */
  }

  /* --- read class distribution --- */
  if ((sc_token(scan) != T_ID)
  ||  ((strcmp(sc_value(scan), "prob") != 0)
  &&   (strcmp(sc_value(scan), "P")    != 0)))
    ERR_STR("prob");            /* check for 'prob' or 'P' */
  GET_TOK();                    /* consume   'prob' or 'P' */
  GET_CHR('(');                 /* consume '(' */
  t = sc_token(scan);           /* get the next token */
  if (((t != T_ID) && (t != T_NUM))
  ||  (strcmp(sc_value(scan), att_name(att)) != 0))
    ERROR(E_ATTEXP);            /* check for the class att. name */
  GET_TOK();                    /* consume the class att. name */
  GET_CHR(')');                 /* consume ')' */
  GET_CHR('=');                 /* consume '=' */
  GET_CHR('{');                 /* consume '{' */
  for (p = fbc->frqs +(i = fbc->clscnt); --i >= 0; )
    *--p = -1;                  /* clear the class frequencies */
  while (1) {                   /* class value read loop */
    t = sc_token(scan);         /* check for the class att. name */
    if ((t != T_ID) && (t != T_NUM)) ERROR(E_CLSEXP);
    if (t != T_NUM) t = ':';    /* if the token is no number, */
    else {                      /* the token must be a class, */
      GET_TOK();                /* otherwise consume the token, */
      t = sc_token(scan);       /* note the next token, and */
      sc_back(scan);            /* go back to the previous one */
    }                           /* (look ahead one token) */
    if (t != ':')               /* if no ':' follows, */
      i = (i+1) % fbc->clscnt;  /* get the cyclic successor id */
    else {                      /* if a  ':' follows */
      i = att_valid(att, sc_value(scan));
      if (i < 0) ERROR(E_UNKCLS);
      GET_TOK();                /* get and consume the class value */
      GET_CHR(':');             /* consume ':' */
    }
    if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP);
    f = atof(sc_value(scan));   /* get and check */
    if (f < 0) ERROR(E_ILLNUM); /* the class frequency */
    if (fbc->frqs[i] >= 0)      /* check whether frequency is set */
      XERROR(E_DUPCLS, att_valname(att, i));
    fbc->frqs[i] = f;           /* set the class frequency */
    GET_TOK();                  /* consume the class frequency */
    if (sc_token(scan) == '('){ /* if a relative number follows, */
      GET_TOK();                /* consume '(' */
      if (sc_token(scan) != T_NUM)  ERROR(E_NUMEXP);
      if (atof(sc_value(scan)) < 0) ERROR(E_ILLNUM);
      GET_TOK();                /* consume the relative number */
      GET_CHR('%');             /* consume '%' */
      GET_CHR(')');             /* consume ')' */
    }
    if (sc_token(scan) != ',') break;
    GET_TOK();                  /* if at end of list, abort loop, */
  }                             /* otherwise consume ',' */
  GET_CHR('}');                 /* consume '}' (end of distribution) */
  for (f = 0, p = fbc->frqs +(i = fbc->clscnt); --i >= 0; ) {
    if (*--p < 0) *p = 0;       /* clear the unset frequencies */
    else          f += *p;      /* and sum all other frequancies */
  }                             /* to obtain the total frequency */
  fbc->total = f;               /* set the sum of the frequencies */
  GET_CHR(';');                 /* consume ';' */

  /* --- read conditional distributions --- */
  if (fbc->numcnt <= 0) {       /* if there are no numeric attributes */
    GET_CHR('}');               /* consume '}' */
    GET_CHR(';');               /* consume ';' (end of classifier) */
    return 0;                   /* return 'ok' */
  }
  if ((sc_token(scan) != T_ID)
  ||  ((strcmp(sc_value(scan), "prob") != 0)
  &&   (strcmp(sc_value(scan), "P")    != 0)))
    ERR_STR("prob");            /* check for 'prob' or 'P' */
  GET_TOK();                    /* consume   'prob' or 'P' */
  GET_CHR('(');                 /* consume '(' */
  if (sc_token(scan) == '*') {  /* if a star follows, */
    GET_TOK(); }                /* simply consume it */
  else {                        /* if a list of attributes follows */
    for (flags = fbc->flags +(i = fbc->attcnt); --i >= 0; )
      *--flags = 0;             /* clear all attribute flags */
    for (ni = fbc->numids +(i = fbc->numcnt); --i >= 0; )
      flags[(--ni)->id] = -1;   /* set flags of numeric attributes */
    while (1) {                 /* attribute read loop */
      t = sc_token(scan);       /* check for a name */
      if ((t != T_ID) && (t != T_NUM)) ERROR(E_ATTEXP);
      attid = as_attid(attset, sc_value(scan));
      if (attid < 0)         ERROR(E_UNKATT);
      if (flags[attid] == 0) ERROR(E_DUPATT);
      flags[attid] = 0;         /* check and clear the attribute flag */
      GET_TOK();                /* consume the attribute name */
      if (sc_token(scan) != ',') break; 
      GET_TOK();                /* if at end of the list, abort loop, */
    }                           /* otherwise consume ',' */
    for (i = fbc->attcnt; --i >= 0; )
      if (flags[i]) XERROR(E_MISATT, att_name(as_att(attset, i)));
  }                             /* check the attribute flags */
  GET_CHR('|');                 /* consume '|' (condition indicator) */
  t = sc_token(scan);           /* get the next token */
  if (((t != T_ID) && (t != T_NUM))
  ||  (strcmp(sc_value(scan), att_name(att)) != 0))
    ERROR(E_CLSEXP);            /* check for a class name */
  GET_TOK();                    /* consume the class name */
  GET_CHR(')');                 /* consume ')' */
  GET_CHR('=');                 /* consume '=' */
  GET_CHR('{');                 /* consume '{' */

  for (p = fbc->posts +(i = fbc->clscnt); --i >= 0; )
    *--p = -1;                  /* mark all classes as unread */
  while (1) {                   /* class value read loop */
    t = sc_token(scan);         /* check for name, number, or 'N' */
    if ((t != T_ID) && (t != T_NUM)) ERROR(E_CLSEXP);
    if (t == T_NUM) t = ':';    /* if the token is a number, */
    else {                      /* the token must be a class, */
      GET_TOK();                /* otherwise consume the token, */
      t = sc_token(scan);       /* note the next token, and */
      sc_back(scan);            /* go back to the previous one */
    }                           /* (look ahead one token) */
    if (t != ':')               /* if no ':' follows, */
      i = (i+1) % fbc->clscnt;  /* get the cyclic successor id */
    else {                      /* if a  ':' follows */
      i = att_valid(att, sc_value(scan));
      if (i < 0) ERROR(E_UNKCLS);
      GET_TOK();                /* get and consume class value */
      GET_CHR(':');             /* consume ':' */
    }
    if (fbc->posts[i] >= 0) ERROR(E_DUPCLS);
    fbc->posts[i] = 1;          /* check and set the read marker */
    if ((sc_token(scan) != T_ID)
    ||  (strcmp(sc_value(scan), "N") != 0))
      ERR_STR("N");             /* check for an 'N' */
    GET_TOK();                  /* consume 'N' */
    GET_CHR('(');               /* consume '(' */
    i = mvn_parse(fbc->mvns[i], scan, fbc->frqs[i]);
    if (i != 0) return i;       /* parse a multivariate normal dist. */
    GET_CHR(')');               /* consume ')' */
    if (sc_token(scan) != ',') break;
    GET_TOK();                  /* if at end of list, abort loop, */
  }                             /* otherwise consume ',' */
  for (p = fbc->posts, i = 0; i < fbc->clscnt; p++, i++) {
    if ((*p < 0) && (fbc->frqs[i] > 0))
      XERROR(E_MISCLS, att_valname(att, i));
  }                             /* check for a complete classifier */
  GET_CHR('}');                 /* consume '}' */
  GET_CHR(';');                 /* consume ';' (end of distribution) */

  GET_CHR('}');                 /* consume '}' */
  GET_CHR(';');                 /* consume ';' (end of classifier) */
  return 0;                     /* return 'ok' */
}  /* _parse() */

/*--------------------------------------------------------------------*/

FBC* fbc_parse (ATTSET *attset, SCAN *scan)
{                               /* --- parse a full Bayes classifier */
  FBC *fbc = NULL;              /* created full Bayes classifier */

  assert(attset && scan);       /* check the function arguments */
  pa_init(scan);                /* initialize parsing */
  if (_parse(attset, scan, &fbc) != 0) {
    if (fbc) fbc_delete(fbc,0); /* parse a full Bayes classifier */
    return NULL;                /* if an error occurred, */
  }                             /* delete the classifier and abort */
  fbc_setup(fbc, fbc->mode, fbc->lcorr);
  return fbc;                   /* set up the created classifier */
}  /* fbc_parse() */            /* and then return it */

#endif