www.pudn.com > FP-GROWTH.rar > tfscan.c
/*----------------------------------------------------------------------
File : tfscan.c
Contents: table file scanner management
Author : Christian Borgelt
History : 04.01.1998 file created
11.03.1998 additional character flags enabled
12.08.1998 function tfs_copy added
01.09.1998 several assertions added
27.09.1998 function tfs_getfld improved
21.10.1998 bug in tfs_sgetc removed
26.11.1998 some function parameters changed to const
04.02.1999 long int changed to int
16.11.1999 number of characters cleared for an empty field
01.12.2000 '\r' made a default blank character
14.07.2001 tfs_sgetc modified, tfs_buf and tfs_err added
19.08.2001 last delimiter stored in TFSCAN structure
11.02.2002 tfs_skip, tfs_reccnt, and tfs_reset added
----------------------------------------------------------------------*/
#include
#include
#include
#include "tfscan.h"
#ifdef STORAGE
#include "storage.h"
#endif
/*----------------------------------------------------------------------
Preprocessor Definitions
----------------------------------------------------------------------*/
/* --- functions --- */
#define isblank(c) tfs_istype(tfs, TFS_BLANK, c)
#define isfldsep(c) tfs_istype(tfs, TFS_FLDSEP, c)
#define isrecsep(c) tfs_istype(tfs, TFS_RECSEP, c)
#define issep(c) tfs_istype(tfs, TFS_FLDSEP|TFS_RECSEP, c)
#define iscomment(c) tfs_istype(tfs, TFS_COMMENT, c)
/*----------------------------------------------------------------------
Functions
----------------------------------------------------------------------*/
TFSCAN* tfs_create (void)
{ /* --- create a table file scanner */
TFSCAN *tfs; /* created table file scanner */
int i; /* loop variable */
char *p; /* to traverse character flags */
tfs = (TFSCAN*)malloc(sizeof(TFSCAN));
if (!tfs) return NULL; /* allocate memory and */
tfs->reccnt = 0; /* initialize the fields */
tfs->delim = TFS_EOF;
for (p = tfs->cflags +256, i = 256; --i >= 0; )
*--p = '\0'; /* initialize the character flags */
tfs->cflags['\n'] = TFS_RECSEP;
tfs->cflags['\t'] = tfs->cflags[' '] = TFS_BLANK|TFS_FLDSEP;
tfs->cflags['\r'] = TFS_BLANK;
return tfs; /* return created table file scanner */
} /* tfs_create() */
/*--------------------------------------------------------------------*/
TFSCAN* tfs_dup (const TFSCAN *tfs)
{ /* --- duplicate a table file scanner */
TFSCAN *dup; /* created duplicate */
dup = (TFSCAN*)malloc(sizeof(TFSCAN));
if (!dup) return NULL; /* create a new table file scanner */
tfs_copy(dup, tfs); /* and copy source into it */
return dup; /* return created duplicate */
} /* tfs_dup() */
/*--------------------------------------------------------------------*/
void tfs_copy (TFSCAN *dst, const TFSCAN *src)
{ /* --- copy a table file scanner */
int i; /* loop variable */
char *d; const char *s; /* to traverse the character flags */
assert(src && dst); /* check arguments */
s = src->cflags +256; d = dst->cflags +256;
for (i = 256; --i >= 0; ) *--d = *--s;
} /* tfs_copy() */ /* copy character flags */
/*--------------------------------------------------------------------*/
int tfs_sgetc (TFSCAN *tfs, const char *s)
{ /* --- get character from string */
int c, code; /* character and character code */
if (s) tfs->s = s; /* if a new string is given, note it */
if (*tfs->s == '\0') /* if at the end of the old string, */
return -1; /* abort the function */
c = (unsigned char)*tfs->s++; /* get the next character */
if (c != '\\') /* if no quoted character, */
return c; /* simply return the character */
c = (unsigned char)*tfs->s++; /* get the next character */
switch (c) { /* and evaluate it */
case 'a': return '\a'; /* 0x07 (BEL) */
case 'b': return '\b'; /* 0x08 (BS) */
case 'f': return '\f'; /* 0x0c (FF) */
case 'n': return '\n'; /* 0x0a (NL) */
case 'r': return '\r'; /* 0x0d (CR) */
case 't': return '\t'; /* 0x09 (HT) */
case 'v': return '\v'; /* 0x0b (VT) */
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
code = c -'0'; /* --- octal character code */
c = *tfs->s; /* get the next character */
if ((c >= '0') && (c <= '7')) code = (code << 3) +c -'0';
else return code; /* decode second digit */
c = *++tfs->s; /* get the next character */
if ((c >= '0') && (c <= '7')) code = (code << 3) +c -'0';
else return c; /* decode third digit */
tfs->s++; /* consume the decoded character */
return code & 0xff; /* and return the character code */
case 'x': /* --- hexadecimal character code */
c = *tfs->s; /* get the next character */
if ((c >= '0') && (c <= '9')) code = c -'0';
else if ((c >= 'a') && (c <= 'f')) code = c -'a' +10;
else if ((c >= 'A') && (c <= 'F')) code = c -'A' +10;
else return 'x'; /* decode first digit */
c = *++tfs->s; /* get the next character */
if ((c >= '0') && (c <= '9')) code = (code << 4) +c -'0';
else if ((c >= 'a') && (c <= 'f')) code = (code << 4) +c -'a' +10;
else if ((c >= 'A') && (c <= 'F')) code = (code << 4) +c -'A' +10;
else return code; /* decode second digit */
tfs->s++; /* consume the decoded character */
return code; /* and return the character code */
default: /* non-function characters */
if (*tfs->s == '\0') return '\\';
else return (unsigned char)*tfs->s++;
} /* return character or backslash */
} /* tfs_sgetc() */
/*--------------------------------------------------------------------*/
int tfs_chars (TFSCAN *tfs, int type, const char *chars)
{ /* --- set characters */
int i, c, d; /* loop variable, characters */
char *p; /* to traverse character flags */
assert(tfs); /* check argument */
if (!chars) return -1; /* if no characters given, abort */
p = tfs->cflags +256; /* clear character flags in type */
for (i = 256; --i >= 0; ) *--p &= (char)~type;
for (c = d = tfs_sgetc(tfs, chars); c >= 0; c = tfs_sgetc(tfs, NULL))
tfs->cflags[c] |= (char)type; /* set character flags */
return (d >= 0) ? d : 0; /* return first character */
} /* tfs_chars() */
/*--------------------------------------------------------------------*/
int tfs_getfld (TFSCAN *tfs, FILE *file, char *buf, int len)
{ /* --- read a table field */
int c; /* character read */
int d; /* delimiter type */
char *p; /* to traverse the buffer */
assert(tfs && file && (!buf || (len >= 0)));
if (!buf) { /* if no buffer given, use internal */
buf = tfs->buf; len = TFS_SIZE; }
p = buf; *p = '\0'; /* clear the read buffer and */
tfs->cnt = 0; /* the number of characters read */
do { /* --- skip leading blanks */
c = getc(file); /* get the next character */
if (c == EOF) return tfs->delim = (ferror(file)) ? -1 : TFS_EOF;
} while (isblank(c)); /* while the character is blank */
if (issep(c)) { /* check for field/record separator */
if (isfldsep(c)) return tfs->delim = TFS_FLD;
tfs->reccnt++; return tfs->delim = TFS_REC;
} /* if at end of record, count reocrd */
while (1) { /* --- read value */
if (len > 0) { /* if the buffer is not full, */
len--; *p++ = (char)c; } /* store the character in the buffer */
c = getc(file); /* get the next character */
if (issep(c)) { d = (isfldsep(c)) ? TFS_FLD : TFS_REC; break; }
if (c == EOF) { d = (ferror(file)) ? -1 : TFS_EOF; break; }
} /* while character is no separator */
while (isblank(*--p)); /* --- remove trailing blanks */
*++p = '\0'; /* terminate string in buffer */
tfs->cnt = (int)(p -buf); /* store number of characters read */
if (d != TFS_FLD) { /* if not at a field separator */
if (d == TFS_REC) tfs->reccnt++;
return tfs->delim = d; /* if at end of record, count record, */
} /* and then abort the function */
while (isblank(c)) { /* --- skip trailing blanks */
c = getc(file); /* get the next character */
if (c == EOF) return tfs->delim = ferror(file) ? -1 : TFS_EOF;
} /* check for end of file */
if (isrecsep(c)) { /* check for a record separator */
tfs->reccnt++; return tfs->delim = TFS_REC; }
if (!isfldsep(c)) /* put back character (may be */
ungetc(c, file); /* necessary if blank = field sep.) */
return tfs->delim = TFS_FLD; /* return the delimiter type */
} /* tfs_getfld() */
/*--------------------------------------------------------------------*/
int tfs_skip (TFSCAN *tfs, FILE *file)
{ /* --- skip comment records */
int c; /* character read */
assert(tfs && file); /* check the function arguments */
while (1) { /* comment read loop */
c = getc(file); /* read the next character */
if (c == EOF) return tfs->delim = ferror(file) ? -1 : TFS_EOF;
if (!iscomment(c)) { /* if the next char. is no comment, */
ungetc(c, file); return 0; } /* put it back and abort */
while (!isrecsep(c)) { /* while not at end of record */
c = fgetc(file); /* get and check the next character */
if (c == EOF) return tfs->delim = ferror(file) ? -1 : TFS_EOF;
} /* consume/skip all characters */
tfs->reccnt++; /* up to the end of the record */
} /* and count the record read */
return tfs->delim = TFS_REC; /* return the delimiter type */
} /* tfs_skip() */