www.pudn.com > bayes.rar > tnorm.c
/*----------------------------------------------------------------------
File : tnorm.c
Contents: program to normalize numeric table columns
Author : Christian Borgelt
History : 22.07.2003 file created from file tbal.c
16.08.2003 slight changes in error message output
----------------------------------------------------------------------*/
#include
#include
#include
#include
#include
#ifndef AS_RDWR
#define AS_RDWR
#endif
#ifndef TAB_RDWR
#define TAB_RDWR
#endif
#include "io.h"
#ifdef STORAGE
#include "storage.h"
#endif
/*----------------------------------------------------------------------
Preprocessor Definitions
----------------------------------------------------------------------*/
#define PRGNAME "tnorm"
#define DESCRIPTION "normalize numeric table columns"
#define VERSION "version 1.1 (2003.08.16) " \
"(c) 2003 Christian Borgelt"
/* --- error codes --- */
#define OK 0 /* no error */
#define E_NONE 0 /* no error */
#define E_NOMEM (-1) /* not enough memory */
#define E_FOPEN (-2) /* file open failed */
#define E_FREAD (-3) /* file read failed */
#define E_FWRITE (-4) /* file write failed */
#define E_OPTION (-5) /* unknown option */
#define E_OPTARG (-6) /* missing option argument */
#define E_ARGCNT (-7) /* wrong number of arguments */
#define E_TYPE (-8) /* wrong field type */
#define E_UNKNOWN (-9) /* unknown error */
/*----------------------------------------------------------------------
Constants
----------------------------------------------------------------------*/
static const char *errmsgs[] = { /* error messages */
/* E_NONE 0 */ "no error\n",
/* E_NOMEM -1 */ "not enough memory\n",
/* E_FOPEN -2 */ "cannot open file %s\n",
/* E_FREAD -3 */ "read error on file %s\n",
/* E_FWRITE -4 */ "write error on file %s\n",
/* E_OPTION -5 */ "unknown option -%c\n",
/* E_OPTARG -6 */ "missing option argument\n",
/* E_ARGCNT -7 */ "wrong number of arguments\n",
/* E_TYPE -8 */ "wrong type (field must be numeric)\n",
/* E_UNKNOWN -9 */ "unknown error\n"
};
/*----------------------------------------------------------------------
Global Variables
----------------------------------------------------------------------*/
const char *prgname = NULL; /* program name for error messages */
static ATTSET *attset = NULL; /* attribute set */
static TABLE *table = NULL; /* table */
static FILE *in = NULL; /* input file */
/*----------------------------------------------------------------------
Functions
----------------------------------------------------------------------*/
static void error (int code, ...)
{ /* --- print error message */
va_list args; /* list of variable arguments */
const char *msg; /* error message */
assert(prgname); /* check the program name */
if (code < E_UNKNOWN) code = E_UNKNOWN;
if (code < 0) { /* if to report an error, */
msg = errmsgs[-code]; /* get the error message */
if (!msg) msg = errmsgs[-E_UNKNOWN];
fprintf(stderr, "\n%s: ", prgname);
va_start(args, code); /* get variable arguments */
vfprintf(stderr, msg, args);/* print the error message */
va_end(args); /* end argument evaluation */
}
#ifndef NDEBUG /* clean up memory */
if (table) tab_delete(table, 0); /* and close files */
if (attset) as_delete(attset);
if (in && (in != stdin)) fclose(in);
#endif
#ifdef STORAGE
showmem("at end of program"); /* check memory usage */
#endif
exit(code); /* abort the program */
} /* error() */
/*--------------------------------------------------------------------*/
int main (int argc, char *argv[])
{ /* --- main function */
int i, k = 0; /* loop variables, counters */
char *s; /* to traverse options */
char **optarg = NULL; /* option argument */
char *fn_hdr = NULL; /* name of table header file */
char *fn_tab = NULL; /* name of table file */
char *fn_out = NULL; /* name of output file */
char *blanks = NULL; /* blank characters */
char *fldseps = NULL; /* field separators */
char *recseps = NULL; /* record separators */
char *uvchars = NULL; /* unknown value characters */
char *nrmcol = NULL; /* name of column to normalize */
int inflags = 0; /* table file read flags */
int outflags = AS_ATT; /* table file write flags */
int nrmid = -1; /* id of column to normalize */
double exp = 0; /* desired expected value */
double sdev = 1; /* desired standard deviation */
prgname = argv[0]; /* get program name for error msgs. */
/* --- print startup/usage message --- */
if (argc > 1) { /* if arguments are given */
fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
fprintf(stderr, VERSION); } /* print a startup message */
else { /* if no argument is given */
printf("usage: %s [options] "
"[-d|-h hdrfile] tabfile outfile\n", argv[0]);
printf("%s\n", DESCRIPTION);
printf("%s\n", VERSION);
printf("-c# name of field to normalize "
"(default: all numeric)\n");
printf("-e# desired expected value or minimum (default: 0)\n");
printf("-s# desired standard deviation (> 0) "
"or range (< 0) (default: 1)\n");
printf("-a align fields of output table "
"(default: do not align)\n");
printf("-w do not write field names to output file\n");
printf("-b/f/r# blank characters, field and record separators\n"
" (default: \" \\t\\r\", \" \\t\", \"\\n\")\n");
printf("-u# unknown value characters (default: \"?\")\n");
printf("-n number of tuple occurrences in last field\n");
printf("-d use default header "
"(field names = field numbers)\n");
printf("-h read table header (field names) from hdrfile\n");
printf("hdrfile file containing table header (field names)\n");
printf("infile table file to read "
"(field names in first record)\n");
printf("outfile file to write output table to\n");
return 0; /* print a usage message */
} /* and abort the program */
/* --- evaluate arguments --- */
for (i = 1; i < argc; i++) { /* traverse arguments */
s = argv[i]; /* get option argument */
if (optarg) { *optarg = s; optarg = NULL; continue; }
if ((*s == '-') && *++s) { /* -- if argument is an option */
while (1) { /* traverse characters */
switch (*s++) { /* evaluate option */
case 'c': optarg = &nrmcol; break;
case 'e': exp = strtod(s, &s); break;
case 's': sdev = strtod(s, &s); break;
case 'a': outflags |= AS_ALIGN; break;
case 'w': outflags &= ~AS_ATT; break;
case 'b': optarg = &blanks; break;
case 'f': optarg = &fldseps; break;
case 'r': optarg = &recseps; break;
case 'u': optarg = &uvchars; break;
case 'n': inflags |= AS_WEIGHT; break;
case 'd': inflags |= AS_DFLT; break;
case 'h': optarg = &fn_hdr; break;
default : error(E_OPTION, *--s); break;
} /* set option variables */
if (!*s) break; /* if at end of string, abort loop */
if (optarg) { *optarg = s; optarg = NULL; break; }
} } /* get option argument */
else { /* -- if argument is no option */
switch (k++) { /* evaluate non-option */
case 0: fn_tab = s; break;
case 1: fn_out = s; break;
default: error(E_ARGCNT); break;
} /* note filenames */
}
}
if (optarg) error(E_OPTARG); /* check option argument */
if (k != 2) error(E_ARGCNT); /* check number of arguments */
if (fn_hdr) { /* set header flags */
inflags = AS_ATT | (inflags & ~AS_DFLT);
if (strcmp(fn_hdr, "-") == 0) fn_hdr = "";
} /* convert "-" to "" */
/* --- read table header --- */
attset = as_create("domains", att_delete);
if (!attset) error(E_NOMEM); /* create an attribute set */
as_chars(attset, blanks, fldseps, recseps, uvchars);
fprintf(stderr, "\n"); /* set delimiter characters */
in = io_hdr(attset, fn_hdr, fn_tab, inflags, 1);
if (!in) error(1); /* read the table header */
/* --- determine id of column to normalize --- */
if (nrmcol) { /* if a column name is given */
if ((nrmid = as_attid(attset, nrmcol)) < 0) {
s = (inflags & AS_ATT) ? fn_hdr : fn_tab;
io_error(E_MISFLD, s, 1, nrmcol); error(1);
} /* check whether class exists */
} /* and abort on error */
/* --- read table --- */
table = io_bodyin(attset, in, fn_tab, inflags, "table", 1);
in = NULL; /* read the table and */
if (!table) error(1); /* check for an error */
/* --- normalize columns --- */
if (nrmid >= 0) { /* if a specific column is given */
tab_colconv(table, nrmid, AT_AUTO); /* determine the type */
k = att_type(tab_col(table, nrmid)); /* and get it */
if (k == AT_INT) tab_colconv(table, nrmid, AT_FLT);
else if (k != AT_FLT) error(E_TYPE); /* convert to float */
tab_colnorm(table, nrmid, exp, sdev); } /* and normalize */
else{ /* if to convert all numeric columns */
for (i = tab_colcnt(table); --i >= 0; ) {
tab_colconv(table, i, AT_AUTO); /* determine the type */
k = att_type(tab_col(table, i)); /* and get it */
if (k == AT_INT) tab_colconv(table, i, AT_FLT);
else if (k != AT_FLT) continue; /* convert to float */
tab_colnorm(table, i, exp, sdev);
} /* normalize the numeric columns */
}
/* --- write output table --- */
if (io_tabout(table, fn_out, outflags, 1) != 0)
error(1); /* write the balanced table */
/* --- clean up --- */
#ifndef NDEBUG
tab_delete(table, 1); /* delete table and attribute set */
#endif
#ifdef STORAGE
showmem("at end of program"); /* check memory usage */
#endif
return 0; /* return 'ok' */
} /* main() */