www.pudn.com > bayes.rar > dom.c
/*----------------------------------------------------------------------
File : dom.c
Contents: program to determine attribute domains
Author : Christian Borgelt
History : 24.11.1995 file created
26.11.1995 adapted to modified attset functions
08.12.1995 variables in and out made global
21.12.1995 sort of domains added
17.01.1996 adapted to modified as_read
23.02.1996 adapted to modified attset functions
27.06.1996 minor improvements
22.11.1996 options -b, -f, and -r added
26.02.1997 tuple weights (option -n) added
08.09.1997 minor improvements
11.01.1998 unknown value characters (option -u) added
12.09.1998 numerial/alphabetical sorting (option -S) added
25.09.1998 table reading simplified
07.02.1999 input from stdin, output to stdout added
12.02.1999 default header handling improved
17.04.1999 simplified using the new module 'io'
14.07.2001 adapted to modified module tfscan
18.06.2002 meaning of option -i (intervals) inverted
16.08.2003 slight changes in error message output
----------------------------------------------------------------------*/
#include
#include
#include
#include
#include
#include "io.h"
#ifdef STORAGE
#include "storage.h"
#endif
/*----------------------------------------------------------------------
Preprocessor Definitions
----------------------------------------------------------------------*/
#define PRGNAME "dom"
#define DESCRIPTION "determine attribute domains"
#define VERSION "version 1.9 (2003.08.16) " \
"(c) 1995-2003 Christian Borgelt"
/* --- error codes --- */
#define OK 0 /* no error */
#define E_NONE 0 /* no error */
#define E_NOMEM (-1) /* not enough memory */
#define E_FOPEN (-2) /* file open failed */
#define E_FREAD (-3) /* file read failed */
#define E_FWRITE (-4) /* file write failed */
#define E_OPTION (-5) /* unknown option */
#define E_OPTARG (-6) /* missing option argument */
#define E_ARGCNT (-7) /* wrong number of arguments */
#define E_UNKNOWN (-8) /* unknown error */
/*----------------------------------------------------------------------
Constants
----------------------------------------------------------------------*/
static const char *errmsgs[] = { /* error messages */
/* E_NONE 0 */ "no error\n",
/* E_NOMEM -1 */ "not enough memory\n",
/* E_FOPEN -2 */ "cannot open file %s\n",
/* E_FREAD -3 */ "read error on file %s\n",
/* E_FWRITE -4 */ "write error on file %s\n",
/* E_OPTION -5 */ "unknown option -%c\n",
/* E_OPTARG -6 */ "missing option argument\n",
/* E_ARGCNT -7 */ "wrong number of arguments\n",
/* E_UNKNOWN -8 */ "unknown error\n"
};
/*----------------------------------------------------------------------
Global Variables
----------------------------------------------------------------------*/
const char *prgname = NULL; /* program name for error messages */
static ATTSET *attset = NULL; /* attribute set */
static FILE *out = NULL; /* output file */
/*----------------------------------------------------------------------
Functions
----------------------------------------------------------------------*/
static void error (int code, ...)
{ /* --- print error message */
va_list args; /* list of variable arguments */
const char *msg; /* error message */
assert(prgname); /* check the program name */
if (code < E_UNKNOWN) code = E_UNKNOWN;
if (code < 0) { /* if to report an error, */
msg = errmsgs[-code]; /* get the error message */
if (!msg) msg = errmsgs[-E_UNKNOWN];
fprintf(stderr, "\n%s: ", prgname);
va_start(args, code); /* get variable arguments */
vfprintf(stderr, msg, args);/* print the error message */
va_end(args); /* end argument evaluation */
}
#ifndef NDEBUG /* clean up memory */
if (attset) as_delete(attset);/* and close files */
if (out && (out != stdout)) fclose(out);
#endif
#ifdef STORAGE
showmem("at end of program"); /* check memory usage */
#endif
exit((code < 0) ? code : -code); /* abort the program */
} /* error() */
/*--------------------------------------------------------------------*/
static int numcmp (const char *name1, const char *name2)
{ /* --- numerical comparison of names */
int n1, n2; /* results of conversion */
n1 = (int)strtol(name1, NULL, 10); /* convert names */
n2 = (int)strtol(name2, NULL, 10); /* to integer numbers */
if (n1 < n2) return -1; /* compare numbers and */
if (n1 > n2) return 1; /* only if they are equal */
return strcmp(name1, name2); /* compare the names directly */
} /* numcmp() */
/*--------------------------------------------------------------------*/
int main (int argc, char *argv[])
{ /* --- main function */
int i, k = 0; /* loop variables, counter */
char *s; /* to traverse the options */
char **optarg = NULL; /* option argument */
char *fn_hdr = NULL; /* name of table header file */
char *fn_tab = NULL; /* name of table file */
char *fn_dom = NULL; /* name of domains file */
char *blanks = NULL; /* blanks */
char *fldseps = NULL; /* field separators */
char *recseps = NULL; /* record separators */
char *uvchars = NULL; /* unknown value characters */
int flags = 0; /* table file read flags */
int sort = 0; /* flag for domain sorting */
int atdet = 0; /* flag for automatic type determ. */
int ivals = AS_IVALS; /* flag for numeric intervals */
int maxlen = 0; /* maximal output line length */
int attid; /* loop variable for attributes */
ATT *att; /* to traverse attributes */
prgname = argv[0]; /* get program name for error msgs. */
/* --- print startup/usage message --- */
if (argc > 1) { /* if arguments are given */
fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
fprintf(stderr, VERSION); } /* print a startup message */
else { /* if no argument is given */
printf("usage: %s [options] "
"[-d|-h hdrfile] tabfile domfile\n", argv[0]);
printf("%s\n", DESCRIPTION);
printf("%s\n", VERSION);
printf("-s sort domains alphabetically "
"(default: order of appearance)\n");
printf("-S sort domains numerically/alphabetically\n");
printf("-a automatic type determination "
"(default: all symbolic)\n");
printf("-i do not print intervals for numeric attributes\n");
printf("-l# output line length (default: no limit)\n");
printf("-b/f/r# blank characters, field and record separators\n"
" (default: \" \\t\\r\", \" \\t\", \"\\n\")\n");
printf("-u# unknown value characters (default: \"?\")\n");
printf("-n number of tuple occurrences in last field\n");
printf("-d use default header "
"(field names = field numbers)\n");
printf("-h read table header (field names) from hdrfile\n");
printf("hdrfile file containing table header (field names)\n");
printf("tabfile table file to read "
"(field names in first record)\n");
printf("domfile file to write domain descriptions to\n");
return 0; /* print a usage message */
} /* and abort the program */
/* --- evaluate arguments --- */
for (i = 1; i < argc; i++) { /* traverse the arguments */
s = argv[i]; /* get option argument */
if (optarg) { *optarg = s; optarg = NULL; continue; }
if ((*s == '-') && *++s) { /* -- if argument is an option */
while (1) { /* traverse characters */
switch (*s++) { /* evaluate option */
case 's': sort = 1; break;
case 'S': sort = 2; break;
case 'a': atdet = 1; break;
case 'i': ivals = 0; break;
case 'l': maxlen = (int)strtol(s, &s, 0); break;
case 'b': optarg = &blanks; break;
case 'f': optarg = &fldseps; break;
case 'r': optarg = &recseps; break;
case 'u': optarg = &uvchars; break;
case 'n': flags |= AS_WEIGHT; break;
case 'd': flags |= AS_DFLT; break;
case 'h': optarg = &fn_hdr; break;
default : error(E_OPTION, *--s); break;
} /* set option variables */
if (!*s) break; /* if at end of string, abort loop */
if (optarg) { *optarg = s; optarg = NULL; break; }
} } /* get option argument */
else { /* -- if argument is no option */
switch (k++) { /* evaluate non-option */
case 0: fn_tab = s; break;
case 1: fn_dom = s; break;
default: error(E_ARGCNT); break;
} /* note filenames */
}
}
if (optarg) error(E_OPTARG); /* check option argument */
if (k != 2) error(E_ARGCNT); /* check number of arguments */
if (fn_hdr && (strcmp(fn_hdr, "-") == 0))
fn_hdr = ""; /* convert "-" to "" */
if (fn_hdr) /* set header flags */
flags = AS_ATT | (flags & ~AS_DFLT);
/* --- determine attributes and domains --- */
attset = as_create("domains", att_delete);
if (!attset) error(E_NOMEM); /* create an attribute set */
as_chars(attset, blanks, fldseps, recseps, uvchars);
fprintf(stderr, "\n"); /* set delimiter characters */
i = io_tab(attset, fn_hdr, fn_tab, flags, 1);
if (i != 0) error(-i); /* read the table */
/* --- convert/sort domains --- */
if (atdet) { /* if automatic type determination */
for (attid = as_attcnt(attset); --attid >= 0; )
att_conv(as_att(attset, attid), AT_AUTO, NULL);
} /* try to convert attributes */
if (sort) { /* if to sort domains (values) */
for (attid = as_attcnt(attset); --attid >= 0; ) {
att = as_att(attset, attid);
if (att_type(att) != AT_SYM) continue;
att_valsort(att, (sort > 1) ? numcmp : strcmp, NULL, 0);
} /* traverse symbolic attributes */
} /* and sort their domains */
/* --- write output file --- */
if (fn_dom && *fn_dom) /* if a domain file name is given, */
out = fopen(fn_dom, "w"); /* open domain file for writing */
else { /* if no domain file name is given, */
out = stdout; fn_dom = ""; } /* write to stdout */
fprintf(stderr, "writing %s ... ", fn_dom);
if (!out) error(E_FOPEN, fn_dom);
if (as_desc(attset, out, AS_TITLE|ivals, maxlen) != 0)
error(E_FWRITE, fn_dom); /* write domain descriptions */
if (out != stdout) { /* if not written to stdout, */
i = fclose(out); out = NULL;/* close the output file */
if (i != 0) error(E_FWRITE, fn_dom);
} /* print a success message */
fprintf(stderr, "[%d attribute(s)] done.\n", as_attcnt(attset));
/* --- clean up --- */
#ifndef NDEBUG
as_delete(attset); /* delete attribute set */
#endif
#ifdef STORAGE
showmem("at end of program"); /* check memory usage */
#endif
return 0; /* return 'ok' */
} /* main() */