www.pudn.com > bayes.rar > nbayes.h


/*----------------------------------------------------------------------
  File    : nbayes.h
  Contents: Naive Bayes classifier management
  Author  : Christian Borgelt
  History : 07.12.1998 file created
            16.12.1998 definition of type DVEC changed
            13.02.1999 tuple parameters added to nbc_add and nbc_exec
            10.03.1999 definition of NBC_MARKED added
            25.03.1999 definition of NBC_DISTUV added
            27.03.1999 some enquiry functions added
            05.04.1999 Laplace correction parameter added
            23.04.1999 parameter 'mode' added to function nbc_parse
            15.05.1999 function nbc_mark added
            13.11.2000 parameter 'dupas' added to function nbc_dup
            18.11.2000 function nbc_setup added, nbc_exec adapted
            21.11.2000 functions nbc_lcorr and nbc_mode added
            16.07.2001 adapted to modified module scan
            26.04.2003 function nbc_rand added
            12.08.2004 adapted to new module parse
----------------------------------------------------------------------*/
#ifndef __NBAYES__
#define __NBAYES__
#ifdef NBC_PARSE
#include "parse.h"
#endif
#include "table.h"

/*----------------------------------------------------------------------
  Preprocessor Definitions
----------------------------------------------------------------------*/
/* --- induction modes --- */
#define NBC_DUPAS   0x0001      /* duplicate attribute set */
#define NBC_ADD     0x0002      /* greedily add attributes */
#define NBC_REMOVE  0x0004      /* greedily remove attributes */

/* --- setup/induction modes --- */
#define NBC_ALL     0x0010      /* set up for all attributes */
#define NBC_MARKED  0x0020      /* set up only for marked attributes */
#define NBC_DISTUV  0x0040      /* distrib. weight for unknown values */
#define NBC_MAXLLH  0x0080      /* max. likelihood estim. of variance */

/* --- description modes --- */
#define NBC_TITLE   0x0001      /* print a title (as a comment) */
#define NBC_REL     0x0002      /* print relative numbers */

/*----------------------------------------------------------------------
  Type Definitions
----------------------------------------------------------------------*/
typedef struct {                /* --- discrete distribution --- */
  double cnt;                   /* number of cases (total frequency) */
  double *frqs;                 /* value frequency   vector */
  double *probs;                /* value probability vector */
} DISCD;                        /* (discrete distribution) */

typedef struct {                /* --- normal distribution --- */
  double cnt;                   /* number of cases (total frequency) */
  double sv;                    /* sum of values */
  double sv2;                   /* sum of squared values */
  double exp;                   /* expected value */
  double var;                   /* variance */
} NORMD;                        /* (normal distribution) */

typedef struct {                /* --- distribution vector --- */
  int    mark;                  /* whether read or to be processed */
  int    type;                  /* attribute type (0: class) */
  int    valvsz;                /* size of value frequency vectors */
  int    valcnt;                /* number of attribute values */
  DISCD  *discds;               /* vector of discrete distributions */
  NORMD  *normds;               /* vector of normal distributions */
} DVEC;                         /* (distribution vector) */

typedef struct {                /* --- naive Bayes classifier --- */
  ATTSET *attset;               /* underlying attribute set */
  int    attcnt;                /* number of attributes */
  int    clsid;                 /* identifier of class attribute */
  int    clsvsz;                /* size of class dependent vectors */
  int    clscnt;                /* number of classes */
  int    mode;                  /* estimation mode (e.g. NBC_MAXLLH) */
  double lcorr;                 /* Laplace correction */
  double total;                 /* total number of cases */
  double *frqs;                 /* class frequencies */
  double *priors;               /* prior     class probabilities */
  double *posts;                /* posterior class probabilities */
  double *cond;                 /* buffer for conditional probs. */
  DVEC   dvecs[1];              /* vector of distribution vectors */
} NBC;                          /* (naive Bayes classifier) */

/*----------------------------------------------------------------------
  Functions
----------------------------------------------------------------------*/
extern NBC*    nbc_create (ATTSET *attset, int clsid);
extern NBC*    nbc_dup    (NBC *nbc, int dupas);
extern void    nbc_delete (NBC *nbc, int delas);
extern void    nbc_clear  (NBC *nbc);

extern ATTSET* nbc_attset (const NBC *nbc);
extern int     nbc_attcnt (const NBC *nbc);
extern int     nbc_valcnt (const NBC *nbc, int attid);
extern int     nbc_clsid  (const NBC *nbc);
extern int     nbc_clscnt (const NBC *nbc);
extern double  nbc_total  (const NBC *nbc);

#ifdef NBC_INDUCE
extern int     nbc_add    (NBC *nbc, const TUPLE *tpl);
extern NBC*    nbc_induce (TABLE *table, int clsid,
                           int mode, double lcorr);
extern int     nbc_mark   (NBC *nbc);
#endif

extern void    nbc_setup  (NBC *nbc, int mode, double lcorr);
extern double  nbc_lcorr  (const NBC *nbc);
extern int     nbc_mode   (const NBC *nbc);

extern double  nbc_prior  (const NBC *nbc, int clsid);
extern double  nbc_prob   (const NBC *nbc, int clsid, int attid,
                           int valid);
extern double  nbc_exp    (const NBC *nbc, int clsid, int attid);
extern double  nbc_var    (const NBC *nbc, int clsid, int attid);
extern int     nbc_exec   (NBC *nbc, const TUPLE *tpl, double *conf);
extern void    nbc_rand   (NBC *nbc, double drand (void));

extern int     nbc_desc   (NBC *nbc, FILE *file, int mode, int maxlen);
#ifdef NBC_PARSE
extern NBC*    nbc_parse  (ATTSET *attset, SCAN *scan);
#endif

/*----------------------------------------------------------------------
  Preprocessor Definitions
----------------------------------------------------------------------*/
#define nbc_attset(b)       ((b)->attset)
#define nbc_attcnt(b)       ((b)->attcnt)
#define nbc_valcnt(b,a)     ((b)->dvecs[a].valcnt)
#define nbc_clsid(b)        ((b)->clsid)
#define nbc_clscnt(b)       ((b)->clscnt)
#define nbc_total(b)        ((b)->total)

#define nbc_lcorr(b)        ((b)->lcorr)
#define nbc_mode(b)         ((b)->mode)

#define nbc_prior(b,c)      ((b)->priors[c])
#define nbc_prob(b,c,a,v)   ((b)->dvecs[a].discds[c].probs[v])
#define nbc_exp(b,c,a)      ((b)->dvecs[a].normds[c].exp)
#define nbc_var(b,c,a)      ((b)->dvecs[a].normds[c].var)

#endif