www.pudn.com > bayes.rar > t1inn.c


/*----------------------------------------------------------------------
  File    : t1inn.c
  Contents: program to convert symbolic attributes to 1-in-n coding
  Author  : Christian Borgelt
  History : 11.08.2003 file created
----------------------------------------------------------------------*/
#include 
#include 
#include 
#include 
#include 
#ifndef AS_RDWR
#define AS_RDWR
#endif
#ifndef AS_PARSE
#define AS_PARSE
#endif
#include "io.h"
#include "attmap.h"
#ifdef STORAGE
#include "storage.h"
#endif

/*----------------------------------------------------------------------
  Preprocessor Definitions
----------------------------------------------------------------------*/
#define PRGNAME     "t1inn"
#define DESCRIPTION "convert symbolic attributes to 1-in-n coding"
#define VERSION     "version 1.1 (2004.08.11)         " \
                    "(c) 2003   Christian Borgelt"

/* --- error codes --- */
#define OK            0         /* no error */
#define E_NONE        0         /* no error */
#define E_NOMEM     (-1)        /* not enough memory */
#define E_FOPEN     (-2)        /* file open failed */
#define E_FREAD     (-3)        /* file read failed */
#define E_FWRITE    (-4)        /* file write failed */
#define E_OPTION    (-5)        /* unknown option */
#define E_OPTARG    (-6)        /* missing option argument */
#define E_ARGCNT    (-7)        /* wrong number of arguments */
#define E_STDIN     (-8)        /* double assignment of stdin */
#define E_PARSE     (-9)        /* parse error(s) */
#define E_UNKNOWN  (-10)        /* unknown error */

/*----------------------------------------------------------------------
  Constants
----------------------------------------------------------------------*/
static const char *errmsgs[] = {   /* error messages */
  /* E_NONE      0 */  "no error\n",
  /* E_NOMEM    -1 */  "not enough memory\n",
  /* E_FOPEN    -2 */  "cannot open file %s\n",
  /* E_FREAD    -3 */  "read error on file %s\n",
  /* E_FWRITE   -4 */  "write error on file %s\n",
  /* E_OPTION   -5 */  "unknown option -%c\n",
  /* E_OPTARG   -6 */  "missing option argument\n",
  /* E_ARGCNT   -7 */  "wrong number of arguments\n",
  /* E_STDIN    -8 */  "double assignment of standard input\n",
  /* E_PARSE    -9 */  "parse error(s) on file %s\n",
  /* E_UNKNOWN -10 */  "unknown error\n"
};

/*----------------------------------------------------------------------
  Global Variables
----------------------------------------------------------------------*/
const  char   *prgname = NULL;  /* program name for error messages */
static SCAN   *scan    = NULL;  /* scanner */
static ATTSET *attset  = NULL;  /* attribute set */
static ATTMAP *attmap  = NULL;  /* attribute map */
static double *vec     = NULL;  /* vector of mapped values */
static FILE   *in      = NULL;  /* input  file */
static FILE   *out     = NULL;  /* output file */

/*----------------------------------------------------------------------
  Main Functions
----------------------------------------------------------------------*/

static void error (int code, ...)
{                               /* --- print error message */
  va_list    args;              /* list of variable arguments */
  const char *msg;              /* error message */

  assert(prgname);              /* check the program name */
  if (code < E_UNKNOWN) code = E_UNKNOWN;
  if (code < 0) {               /* if to report an error, */
    msg = errmsgs[-code];       /* get the error message */
    if (!msg) msg = errmsgs[-E_UNKNOWN];
    fprintf(stderr, "\n%s: ", prgname);
    va_start(args, code);       /* get variable arguments */
    vfprintf(stderr, msg, args);/* print the error message */
    va_end(args);               /* end argument evaluation */
  }
  #ifndef NDEBUG                /* clean up memory */
  if (scan)   sc_delete(scan);  /* and close files */
  if (attset) as_delete(attset);
  if (attmap) am_delete(attmap);
  if (vec)    free(vec);
  if (in  && (in  != stdin))  fclose(in);
  if (out && (out != stdout)) fclose(out);
  #endif
  #ifdef STORAGE
  showmem("at end of program"); /* check memory usage */
  #endif
  exit(code);                   /* abort the program */
}  /* error() */

/*--------------------------------------------------------------------*/

int main (int argc, char *argv[])
{                               /* --- main function */
  int    i, k = 0, n, f;        /* loop variables, counter */
  char   *s;                    /* to traverse the options */
  char   **optarg = NULL;       /* option argument */
  char   *fn_dom  = NULL;       /* name of domains file */
  char   *fn_hdr  = NULL;       /* name of table header file */
  char   *fn_in   = NULL;       /* name of input  file */
  char   *fn_out  = NULL;       /* name of output file */
  char   *blanks  = NULL;       /* blanks characters */
  char   *fldseps = NULL;       /* field  separators */
  char   *recseps = NULL;       /* record separators */
  char   *uvchars = NULL;       /* unknown value characters */
  int    inflags  = AS_NOXATT;  /* table file read flags */
  int    outflags = AS_ATT;     /* table file write flags */
  int    tplcnt   = 0;          /* number of tuples */
  double tplwgt   = 0.0;        /* weight of tuples */
  char   *fmt     = "%g";       /* output format for numbers */
  float  wgt;                   /* tuple/instantiation weight */
  CCHAR  *seps;                 /* separator characters */
  CCHAR  *name;                 /* attribute name */
  TFSERR *err;                  /* error information */

  prgname = argv[0];            /* get program name for error msgs. */

  /* --- print startup/usage message --- */
  if (argc > 1) {               /* if arguments are given */
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } /* print a startup message */
  else {                        /* if no argument is given */
    printf("usage: %s [options] domfile "
                     "[-d|-h hdrfile] infile outfile\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-o#      number output format (default: \"%s\")\n", fmt);
    printf("-w       do not write field names to output file\n");
    printf("-b/f/r#  blank characters, field and record separators\n"
           "         (default: \" \\t\\r\", \" \\t\", \"\\n\")\n");
    printf("-u#      unknown value characters (default: \"?\")\n");
    printf("-n       number of tuple occurrences in last field\n");
    printf("-d       use default header "
                    "(field names = field numbers)\n");
    printf("domfile  file containing domain descriptions\n");
    printf("-h       read table header (field names) from hdrfile\n");
    printf("hdrfile  file containing table header (field names)\n");
    printf("infile   table file to read "
                    "(field names in first record)\n");
    printf("outfile  table file to write\n");
    return 0;                   /* print a usage message */
  }                             /* and abort the program */

  /* --- evaluate arguments --- */
  for (i = 1; i < argc; i++) {  /* traverse the arguments */
    s = argv[i];                /* get option argument */
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  /* -- if argument is an option */
      while (1) {               /* traverse characters */
        switch (*s++) {         /* evaluate option */
          case 'o': optarg    = &fmt;      break;
          case 'w': outflags &= ~AS_ATT;   break;
  	  case 'b': optarg    = &blanks;   break;
          case 'f': optarg    = &fldseps;  break;
          case 'r': optarg    = &recseps;  break;
          case 'u': optarg    = &uvchars;  break;
          case 'n': outflags |= AS_WEIGHT;
                    inflags  |= AS_WEIGHT; break;
          case 'd': inflags  |= AS_DFLT;   break;
          case 'h': optarg    = &fn_hdr;   break;
          default : error(E_OPTION, *--s); break;
        }                       /* set option variables */
        if (!*s) break;         /* if at end of string, abort loop */
        if (optarg) { *optarg = s; optarg = NULL; break; }
      } }                       /* get option argument */
    else {                      /* -- if argument is no option */
      switch (k++) {            /* evaluate non-option */
        case  0: fn_dom = s;      break;
        case  1: fn_in  = s;      break;
        case  2: fn_out = s;      break;
        default: error(E_ARGCNT); break;
      }                         /* note filenames */
    }
  }
  if (optarg) error(E_OPTARG);  /* check option argument */
  if (k != 3) error(E_ARGCNT);  /* check number of arguments */
  if (fn_hdr && (strcmp(fn_hdr, "-") == 0))
    fn_hdr = "";                /* convert "-" to "" */
  i = (!fn_dom || !*fn_dom) ? 1 : 0;
  if (fn_in  && !*fn_in)  i++;
  if (fn_hdr && !*fn_hdr) i++;  /* check assignments of stdin: */
  if (i > 1) error(E_STDIN);    /* stdin must not be used twice */
  if (fn_hdr)                   /* set the header file flag */
    inflags = AS_ATT | (inflags & ~AS_DFLT);
  if ((outflags & AS_ATT) && (outflags & AS_ALIGN))
    outflags |= AS_ALNHDR;      /* set align to header flag */

  /* --- read attribute set --- */
  scan = sc_create(fn_dom);     /* create a scanner */
  if (!scan) error((!fn_dom || !*fn_dom) ? E_NOMEM : E_FOPEN, fn_dom);
  attset = as_create("domains", att_delete);
  if (!attset) error(E_NOMEM);  /* create an attribute set */
  fprintf(stderr, "\nreading %s ... ", sc_fname(scan));
  if ((sc_nexter(scan)   <  0)  /* start scanning (get first token) */
  ||  (as_parse(attset, scan, AT_ALL) != 0)
  ||  (as_attcnt(attset) <= 0)  /* parse attribute set and */
  ||  !sc_eof(scan))            /* check for end of file */
    error(E_PARSE, sc_fname(scan));
  sc_delete(scan); scan = NULL; /* delete the scanner */
  attmap = am_create(attset, 0, -1);
  if (!attset) error(E_NOMEM);  /* create an attribute map */
  vec = (double*)malloc(am_dim(attmap) *sizeof(double));
  if (!vec)    error(E_NOMEM);  /* create an output vector */
  fprintf(stderr, "[%d attribute(s)] done.\n", as_attcnt(attset));

  /* --- read table header --- */
  seps = as_chars(attset, blanks, fldseps, recseps, uvchars);
  in = io_hdr(attset, fn_hdr, fn_in, inflags|AS_MARKED, 1);
  if (!in) error(1);            /* read the table header */
  if ((outflags & AS_ALIGN)     /* if to align output file */
  &&  (in != stdin)) {          /* and not to read from stdin */
    i = AS_INST | (inflags & ~(AS_ATT|AS_DFLT));
    while (as_read(attset, in, i) == 0);
    fclose(in);                 /* determine the column widths */
    in = io_hdr(attset, fn_hdr, fn_in, inflags|AS_MARKED, 1);
    if (!in) error(1);          /* reread the table header */
  }                             /* (necessary because of first tuple) */

  /* --- write output file --- */
  if (fn_out && *fn_out)        /* if an output file name is given, */
    out = fopen(fn_out, "w");   /* open output file for writing */
  else {                        /* if no output file name is given, */
    out = stdout; fn_out = ""; }         /* write to stdout */
  if (!out) error(E_FOPEN, fn_out);
  if (outflags & AS_ATT) {      /* if to write table header */
    for (i = 0; i < am_attcnt(attmap); i++) {
      if (i > 0) fputc(seps[1], out);  /* print a separator */
      name = att_name(as_att(attset, i));
      n = am_cnt(attmap, i);    /* get name and column counter */
      if (n <= 1) {             /* single column: print only name */
        fputs(name, out); continue; }
      for (k = 1; k <= n; k++){ /* multiple column: */
        if (k > 1) fputc(seps[1], out);
        fprintf(out, "%s_%d", name, k);
      }                         /* print a separator and */
    }                           /* name and column counter */
    if (outflags & AS_WEIGHT) { /* print a weight indicator */
      fputc(seps[1], out); fputc('#', out); }
    fputc(seps[2], out);        /* terminate the output line */
  }
  n = am_dim(attmap);           /* initialize the read flags */
  f = AS_INST|(inflags & ~(AS_ATT|AS_DFLT));
  i = ((inflags & AS_DFLT) && !(inflags & AS_ATT))
    ? 0 : as_read(attset, in, f);
  while (i == 0) {              /* record read loop */
    wgt = as_getwgt(attset);    /* get the tuple weight, count */
    tplwgt += wgt; tplcnt++;    /* the tuple, and sum its weight */
    am_exec(attmap, NULL, AM_INPUTS, vec);
    for (k = 0; k < n; k++) {   /* execute the attribute map */
      if (k > 0) fputc(seps[1], out);
      fprintf(out, fmt,vec[k]); /* print a field separator */
    }                           /* and the vector element */
    fputc(seps[2], out);        /* terminate the output line */
    i = as_read(attset, in, f); /* try to read the next record */
  }
  if (i < 0) {                  /* if an error occurred, */
    err = as_err(attset);       /* get the error information */
    tplcnt += (inflags & (AS_ATT|AS_DFLT)) ? 1 : 2;
    io_error(i, fn_in, tplcnt, err->s, err->fld, err->exp);
    error(1);                   /* print an error message */
  }                             /* and abort the program */
  if (in != stdin) fclose(in);  /* close the table file and */
  in = NULL;                    /* clear the file variable */
  if (out != stdout) {          /* if not written to stdout, */
    i = fclose(out); out = NULL;/* close the output file */
    if (i != 0) error(E_FWRITE, fn_out);
  }
  fprintf(stderr, "[%d/%g tuple(s)] done.\n", tplcnt, tplwgt);

  /* --- clean up --- */
  #ifndef NDEBUG
  as_delete(attset);            /* delete attribute set, */
  am_delete(attmap);            /* attribute map, */
  free(vec);                    /* and output vector */
  #endif
  #ifdef STORAGE
  showmem("at end of program"); /* check memory usage */
  #endif
  return 0;                     /* return 'ok' */
}  /* main() */