www.pudn.com > Îı¾ÅÅÐò.rar > demo_c.c


/* demo.c -- Implementations of multikey quicksort and ternary search trees 
   Usage 
    demo                  Run basic timings on /usr/dict/words 
    demo            Run basic timings on  
    demo  trysearch Interactive pm and nn search on  
    demo  nncost    Run near neigbhor expers on  
    demo  pmcost    Interactive partial match expers on  
 */ 
 
#include  
#include  
#include  
#include  
 
// MULTIKEY QUICKSORT 
 
#ifndef min 
#define min(a, b) ((a)<=(b) ? (a) : (b)) 
#endif 
 
#define swap(a, b) { char *t=x[a]; \ 
                     x[a]=x[b]; x[b]=t; } 
#define i2c(i) x[i][depth] 
 
void vecswap(int i, int j, int n, char *x[]) 
{   while (n-- > 0) { 
        swap(i, j); 
        i++; 
        j++; 
    } 
} 
 
void ssort1(char *x[], int n, int depth) 
{   int    a, b, c, d, r, v; 
    if (n <= 1) 
        return; 
    a = rand() % n; 
    swap(0, a); 
    v = i2c(0); 
    a = b = 1; 
    c = d = n-1; 
    for (;;) { 
        while (b <= c && (r = i2c(b)-v) <= 0) { 
            if (r == 0) { swap(a, b); a++; } 
            b++; 
        } 
        while (b <= c && (r = i2c(c)-v) >= 0) { 
            if (r == 0) { swap(c, d); d--; } 
            c--; 
        } 
        if (b > c) break; 
        swap(b, c); 
        b++; 
        c--; 
    } 
    r = min(a, b-a);     vecswap(0, b-r, r, x); 
    r = min(d-c, n-d-1); vecswap(b, n-r, r, x); 
    r = b-a; ssort1(x, r, depth); 
    if (i2c(r) != 0) 
        ssort1(x + r, a + n-d-1, depth+1); 
    r = d-c; ssort1(x + n-r, r, depth); 
} 
 
void ssort1main(char *x[], int n) 
{ ssort1(x, n, 0); } 
 
 
// ssort2 -- Faster Version of Multikey Quicksort 
 
void vecswap2(char **a, char **b, int n) 
{   while (n-- > 0) { 
        char *t = *a; 
        *a++ = *b; 
        *b++ = t; 
    } 
} 
 
#define swap2(a, b) { t = *(a); *(a) = *(b); *(b) = t; } 
#define ptr2char(i) (*(*(i) + depth)) 
 
char **med3func(char **a, char **b, char **c, int depth) 
{   int va, vb, vc; 
    if ((va=ptr2char(a)) == (vb=ptr2char(b))) 
        return a; 
    if ((vc=ptr2char(c)) == va || vc == vb) 
        return c;        
    return va < vb ? 
          (vb < vc ? b : (va < vc ? c : a ) ) 
        : (vb > vc ? b : (va < vc ? a : c ) ); 
} 
#define med3(a, b, c) med3func(a, b, c, depth) 
 
void inssort(char **a, int n, int d) 
{   char **pi, **pj, *s, *t; 
    for (pi = a + 1; --n > 0; pi++) 
        for (pj = pi; pj > a; pj--) { 
            // Inline strcmp: break if *(pj-1) <= *pj 
            for (s=*(pj-1)+d, t=*pj+d; *s==*t && *s!=0; s++, t++) 
                ; 
            if (*s <= *t) 
                break; 
            swap2(pj, pj-1); 
    } 
} 
 
void ssort2(char **a, int n, int depth) 
{   int d, r, partval; 
    char **pa, **pb, **pc, **pd, **pl, **pm, **pn, *t; 
    if (n < 10) { 
        inssort(a, n, depth); 
        return; 
    } 
    pl = a; 
    pm = a + (n/2); 
    pn = a + (n-1); 
    if (n > 30) { // On big arrays, pseudomedian of 9 
        d = (n/8); 
        pl = med3(pl, pl+d, pl+2*d); 
        pm = med3(pm-d, pm, pm+d); 
        pn = med3(pn-2*d, pn-d, pn); 
    } 
    pm = med3(pl, pm, pn); 
    swap2(a, pm); 
    partval = ptr2char(a); 
    pa = pb = a + 1; 
    pc = pd = a + n-1; 
    for (;;) { 
        while (pb <= pc && (r = ptr2char(pb)-partval) <= 0) { 
            if (r == 0) { swap2(pa, pb); pa++; } 
            pb++; 
        } 
        while (pb <= pc && (r = ptr2char(pc)-partval) >= 0) { 
            if (r == 0) { swap2(pc, pd); pd--; } 
            pc--; 
        } 
        if (pb > pc) break; 
        swap2(pb, pc); 
        pb++; 
        pc--; 
    } 
    pn = a + n; 
    r = min(pa-a, pb-pa);    vecswap2(a,  pb-r, r); 
    r = min(pd-pc, pn-pd-1); vecswap2(pb, pn-r, r); 
    if ((r = pb-pa) > 1) 
        ssort2(a, r, depth); 
    if (ptr2char(a + r) != 0) 
        ssort2(a + r, pa-a + pn-pd-1, depth+1); 
    if ((r = pd-pc) > 1) 
        ssort2(a + n-r, r, depth); 
} 
 
void ssort2main(char **a, int n) { ssort2(a, n, 0); } 
 
 
// TERNARY SEARCH TREE ALGS 
 
typedef struct tnode *Tptr; 
typedef struct tnode { 
    char splitchar; 
    Tptr lokid, eqkid, hikid; 
} Tnode; 
Tptr root; 
 
// Insert 1 -- Simple Insertion Algorithm 
 
Tptr insert1(Tptr p, char *s) 
{   if (p == 0) { 
        p = (Tptr) malloc(sizeof(Tnode)); 
        p->splitchar = *s; 
        p->lokid = p->eqkid = p->hikid = 0; 
    } 
    if (*s < p->splitchar) 
        p->lokid = insert1(p->lokid, s); 
    else if (*s == p->splitchar) { 
        if (*s != 0) 
            p->eqkid = insert1(p->eqkid, ++s); 
    } else 
        p->hikid = insert1(p->hikid, s); 
    return p; 
} 
 
void cleanup1(Tptr p) 
{   if (p) { 
        cleanup1(p->lokid); 
        cleanup1(p->eqkid); 
        cleanup1(p->hikid); 
        free(p); 
    } 
} 
 
 
// Insert 2 -- Faster version of Insert 
 
#define BUFSIZE 1000 
Tptr buf; 
int bufn, freen; 
void * freearr[10000]; 
int storestring = 0; 
 
void insert2(char *s) 
{   int d; 
    char *instr = s; 
 
    Tptr pp, *p; 
    p = &root; 
    while (pp = *p) { 
        if ((d = *s - pp->splitchar) == 0) { 
            if (*s++ == 0) return; 
            p = &(pp->eqkid); 
        } else if (d < 0) 
            p = &(pp->lokid); 
        else 
            p = &(pp->hikid); 
    } 
    for (;;) { 
        // *p = (Tptr) malloc(sizeof(Tnode)); 
          if (bufn-- == 0) { 
              buf = (Tptr) malloc(BUFSIZE * 
                              sizeof(Tnode)); 
              freearr[freen++] = (void *) buf; 
              bufn = BUFSIZE-1; 
          } 
          *p = buf++; 
        pp = *p; 
        pp->splitchar = *s; 
        pp->lokid = pp->eqkid = pp->hikid = 0; 
        if (*s++ == 0) { 
            if (storestring) 
                pp->eqkid = (Tptr) instr; 
            return; 
        } 
        p = &(pp->eqkid); 
    } 
} 
void cleanup2() 
{   int i; 
    for (i = 0; i < freen; i++) 
        free(freearr[i]); 
} 
 
// Search Algorithms 
 
int search1(char *s) 
{   Tptr p; 
    p = root; 
    while (p) { 
        if (*s < p->splitchar) 
            p = p->lokid; 
        else if (*s == p->splitchar)  { 
            if (*s++ == 0) 
                return 1; 
            p = p->eqkid; 
        } else 
            p = p->hikid; 
    } 
    return 0; 
} 
 
int search2(char *s) 
{   int d, sc; 
    Tptr p; 
    sc = *s; 
    p = root; 
    while (p) { 
        if ((d = sc - p->splitchar) == 0) { 
            if (sc == 0) 
                return 1; 
            sc = *++s; 
            p = p->eqkid; 
        } else if (d < 0) 
            p = p->lokid; 
        else  
            p = p->hikid; 
    } 
    return 0; 
} 
 
// Advanced searching: Partial match, near words 
 
int nodecnt; 
char *srcharr[100000]; 
int    srchtop; 
 
void pmsearch(Tptr p, char *s) 
{   if (!p) return; 
    nodecnt++; 
    if (*s == '.' || *s < p->splitchar) 
        pmsearch(p->lokid, s); 
    if (*s == '.' || *s == p->splitchar) 
        if (p->splitchar && *s) 
            pmsearch(p->eqkid, s+1); 
    if (*s == 0 && p->splitchar == 0) 
        srcharr[srchtop++] = 
            (char *) p->eqkid; 
    if (*s == '.' || *s > p->splitchar) 
        pmsearch(p->hikid, s); 
} 
 
void nearsearch(Tptr p, char *s, int d) 
{   if (!p || d < 0) return; 
    nodecnt++; 
    if (d > 0 || *s < p->splitchar) 
        nearsearch(p->lokid, s, d); 
    if (p->splitchar == 0) { 
        if ((int) strlen(s) <= d) 
            srcharr[srchtop++] = 
                (char *) p->eqkid; 
    } else 
        nearsearch(p->eqkid, *s ? s+1:s, 
            (*s==p->splitchar) ? d:d-1); 
    if (d > 0 || *s > p->splitchar) 
        nearsearch(p->hikid, s, d); 
} 
 
// OTHER SET ALGORITHMS AND DATA STRUCTURES 
 
// Private scmp to avoid tuned library function 
int scmp(char *s, char *t) 
{   for ( ; *s == *t; s++, t++) 
        if (*s == 0) 
            return 0; 
    return *s - *t; 
} 
 
// Binary search 
 
int sbsearch(char *key, char **a, int n) 
{   int    m; 
    char    *s, *t; 
    while (n >= 1) { 
        m = n/2; 
        for (s = key, t = a[m]; *s == *t; s++, t++) 
            if (*s == 0) 
                return 1; 
        if (*s < *t) { 
            n = m; 
        } else { 
            a = a + m+1; n = n - m - 1; 
        } 
    } 
    return 0; 
} 
 
// Hashing 
 
typedef struct hnode *Hptr; 
typedef struct hnode { 
    char    *str; 
    Hptr    next; 
} Hnode; 
Hptr *hashtab; 
int    tabsize; 
 
int hashfunc(char *s) 
{   unsigned n = 0; 
    for ( ; *s; s++) 
        n = 31 * n + *s; 
    return n % tabsize; 
} 
 
void hbuild(char **a, int n) 
{   int    i, j; 
    Hptr    p; 
    tabsize = n; 
    hashtab = (Hptr *) malloc(tabsize * sizeof(Hptr)); 
    for (i = 0; i < tabsize; i++) 
        hashtab[i] = 0; 
    for (i = 0; i < tabsize; i++) { 
        j = hashfunc(a[i]); 
        p = (Hptr) malloc(sizeof(Hnode)); 
        p->str = a[i]; 
        p->next = hashtab[j]; 
        hashtab[j] = p; 
    } 
} 
 
int hsearch(char *s) 
{   Hptr p; 
    for (p = hashtab[hashfunc(s)]; p; p = p->next) 
        if (scmp(s, p->str) == 0) 
            return 1; 
    return 0; 
} 
 
int hsearch2(char *ins) 
{   Hptr p; 
    char *s, *t; 
    for (p = hashtab[hashfunc(ins)]; p; p = p->next) { 
        // if (scmp(ins, p->str) == 0) 
        //     return 1; 
        for (s = ins, t = p->str; *s == *t; s++, t++) 
            if (*s == 0) 
                return 1; 
    } 
    return 0; 
} 
 
int hsearch3(char *s) 
{   Hptr p; 
    for (p = hashtab[hashfunc(s)]; p; p = p->next) 
        if (strcmp(s, p->str) == 0) 
            return 1; 
    return 0; 
} 
 
// Radix Sort -- McIlroy, Bostic and McIlroy 
 
#define SIZE 510 
#define THRESHOLD 16 
 
typedef char *string; 
typedef struct { string *sa; int sn, si; } rstack_t; 
 
#define rswap(p, q, r)    r = p, p = q, q = r 
 
void simplesort(string *a, int n, int b)    /* insertion sort */ 
{     string *ak, *ai, s, t; 
 
    for (ak = a+1; --n >= 1; ak++) { 
        for (ai = ak; ai > a; ai--) { 
            for (s = ai[0]+b, t = ai[-1]+b; *s; s++, t++) 
                if (*s != *t) 
                    break; 
            if (*s >= *t) 
                break; 
            rswap(ai[0], ai[-1], s); 
        } 
    } 
} 
 
#define push(a, n, i)    sp->sa = a, sp->sn = n, (sp++)->si = i 
#define pop(a, n, i)    a = (--sp)->sa, n = sp->sn, i = sp->si 
#define stackempty()    (sp <= stack) 
 
void rsorta(string *a, int n, int b) 
{   rstack_t        stack[SIZE], stmp, *oldsp, *bigsp, *sp = stack; 
    string        *pile[256], *ak, *an, r, t; 
    static int    count[256], cmin, nc; 
    int        c, *cp; 
 
    push(a, n, b); 
    while (!stackempty()) { 
        pop(a, n, b); 
        if(n < THRESHOLD) {        // divert 
            simplesort(a, n, b); 
            continue; 
        } 
        an = a + n; 
        if(nc == 0) {            // nonrecursive? 
            cmin = 255;        // tally 
            for (ak = a; ak < an; ) { 
                c = (*ak++)[b]; 
                if(++count[c] == 1 && c) { 
                    if(c < cmin) cmin = c; 
                    nc++; 
            }    } 
            if(sp+nc > stack+SIZE) { // stack overflow 
                rsorta(a, n, b); 
                continue; 
        }    } 
        oldsp = bigsp = sp, c = 2;    // logarithmic stack 
        pile[0] = ak = a + count[0];    // find places 
        for(cp = count+cmin; nc > 0; cp++, nc--) { 
            while(*cp == 0) cp++; 
            if(*cp > 1) { 
                if(*cp > c) c = *cp, bigsp = sp; 
                push(ak, *cp, b+1); 
            } 
            pile[cp-count] = ak += *cp; 
        } 
        rswap(*oldsp, *bigsp, stmp); 
                        // permute home 
        for(ak = a; ak < an; ak += count[c], count[c] = 0) { 
            r = *ak; 
            while(--pile[c = r[b]] > ak) 
                rswap(*pile[c], r, t); 
            *ak = r; 
        }        // here nc = count[...] = 0 
}    } 
 
void rsort(string *a, int n) { rsorta(a, n, 0); } 
 
 
 
// TIMING 
 
// Sort support functions 
 
void scramble(char *x[], int n) 
{   int i; 
    for ( ; n > 0; n--) { 
        i = rand() % (n+1); 
        swap(n, i); 
    } 
} 
 
int qscomp(const void *p, const void *q ) 
{   return strcmp(* (char**) p, * (char**) q ); 
} 
#define DOQSORT qsort((void *) a, (size_t) n, sizeof(char *), qscomp) 
 
 
// Insertion support functions 
 
int instype; 
 
void rinsall(char **a, int n) 
{   int m; 
    if (n < 1) return; 
    m = n/2; 
    switch (instype) { 
    case 1: root = insert1(root,a[m]); break; 
    case 2: insert2(a[m]); break; 
    case 9: break; 
    } 
    rinsall(a, m); 
    rinsall(a + m+1, n-m-1); 
} 
 
void insall(char **a, int n) 
{   switch (instype) { 
    case 1: root = 0; rinsall(a, n); break; 
    case 2: root = 0; bufn = freen = 0; rinsall(a, n); break; 
    case 9: rinsall(a, n); break; 
    } 
} 
 
// Search support functions 
 
int searchtype; 
 
void searchall(char **a, int n) 
{   int i, j; 
    char s[100];  
    for (i = 0; i < n; i++) { 
        strcpy(s, a[i]); 
        // s[0]++;  // Uncomment for unsuccessful searches 
        switch(searchtype) { 
        case 1: j = search1(s); break; 
        case 2: j = search2(s); break; 
        case 3: srchtop = 0; pmsearch(root, s); j = srchtop; break; 
        case 4: srchtop = 0; nearsearch(root, s, 0); j = srchtop; break; 
        case 5: j = sbsearch(s, a, n); break; 
        case 6: j = hsearch(s); break; 
        case 7: j = hsearch2(s); break; 
        case 8: j = hsearch3(s); break; 
        case 9: j = 1; break; 
        } 
        if (j != 1)  // Comment for unsuccessful searches 
           printf("search bug at %d, val %d\n", i, j); 
    }     
} 
 
 
// Main timing 
 
int    n = 0, starttime,  rptctr; 
 
#define TASK(s)    printf("%s", s); 
#define CIN        starttime = clock(); 
#define COUT    printf("\t%d", clock()-starttime); 
#define NL    printf("\n") 
#define REPEAT(s)    for (rptctr=0; rptctr<5; rptctr++) { s }; NL; 
 
char space[1000000], *a[100000]; 
 
void timeall() 
{   TASK("System Qsort") 
        REPEAT(scramble(a, n); CIN; DOQSORT; COUT;) 
    TASK("Simple MKQSort"); 
        REPEAT(scramble(a, n); CIN; ssort1main(a, n); COUT;) 
    TASK("Faster MKQSort"); 
        REPEAT(scramble(a, n); CIN; ssort2main(a, n); COUT;) 
    TASK("Radix Sort"); 
        REPEAT(scramble(a, n); CIN; rsort(a, n); COUT;) 
    TASK("Null Insert"); 
        instype = 9; 
        REPEAT(CIN; insall(a, n); COUT;) 
    TASK("Simp TST Insert"); 
        instype = 1; 
        REPEAT(CIN; insall(a, n); COUT; cleanup1(root);) 
    TASK("Fast TST Insert"); 
        instype = 2; 
        REPEAT(CIN; insall(a, n); COUT; cleanup2();) 
    TASK("Null Search"); 
        searchtype = 9; 
        REPEAT(CIN; searchall(a, n); COUT;) 
instype = 2; insall(a, n);    // Build TST for searching 
    TASK("Simp TST Search"); 
        searchtype = 1; 
        REPEAT(CIN; searchall(a, n); COUT;) 
    TASK("Fast TST Search"); 
        searchtype = 2; 
        REPEAT(CIN; searchall(a, n); COUT;) 
    TASK("PM TST Search"); 
        searchtype = 3; 
        REPEAT(CIN; searchall(a, n); COUT;) 
    TASK("Near TST Search"); 
        searchtype = 4; 
        REPEAT(CIN; searchall(a, n); COUT;) 
cleanup2();                   // Remove TST 
    TASK("Binary Search"); 
        searchtype = 5; 
        REPEAT(CIN; searchall(a, n); COUT;) 
    TASK("Build Hash"); 
        CIN; 
        hbuild(a, n); // Build once -- no cleanup 
        COUT; NL; 
    TASK("Hash Search"); 
        searchtype = 6; 
        REPEAT(CIN; searchall(a, n); COUT;) 
    TASK("Hash, Inln Cmp"); 
        searchtype = 7; 
        REPEAT(CIN; searchall(a, n); COUT;) 
    TASK("Hash, Sys Cmp"); 
        searchtype = 8; 
        REPEAT(CIN; searchall(a, n); COUT;) 
// Hash table is still using space 
} 
 
void buildptrtree() 
{   TASK("Building TST"); 
        CIN; 
        storestring = 1;    // Sleazy hack: store strings in eqkid 
        ssort2main(a, n); 
        instype = 2; 
        insall(a, n); 
        COUT; NL; 
} 
 
void trysearch() 
{   char s[100]; 
    int i, d; 
 
    buildptrtree(); 
    printf("Enter searches:    (dist=-1 for pm search)\n"); 
    while (scanf("%d %s", &d, s) != EOF) { 
        srchtop = 0; 
        nodecnt = 0; 
        CIN; 
        if (d < 0) 
            pmsearch(root, s); 
        else 
            nearsearch(root, s, d); 
        printf(" matches=%d nodes=%d clicks=", srchtop, nodecnt); 
        COUT; NL; 
        for (i = 0; i < min(srchtop, 40); i++) 
            printf(" %s", srcharr[i]); 
        NL; 
    } 
} 
 
void nncost() 
{   int i, d, mincost, minind, maxcost, maxind;  
 	double sum; 
    buildptrtree(); 
    for (d = 0; d <= 4; d++) { 
        CIN; 
        mincost = 999999; maxcost = -1; sum = 0.0; 
        for (i = 0; i < n; i++) { 
            srchtop = 0; nodecnt = 0; 
            nearsearch(root, a[i], d); 
			sum += nodecnt; 
            if (nodecnt <= mincost) { mincost = nodecnt; minind = i; } 
            if (nodecnt >= maxcost) { maxcost = nodecnt; maxind = i; } 
        } 
        printf("Dist %d\t%d %s\t%d %s\t%g", d, 
            mincost, a[minind], 
            maxcost, a[maxind], 
			sum/n); 
        COUT; NL; 
    }     
} 
 
void pmcost() 
{   int i, j, l, u, mincost, minind, maxcost, maxind; 
	double sum; 
	char buf[100];  
    buildptrtree(); 
    printf("Enter l, u pair\n"); 
	while (scanf("%d %d", &l, &u) != EOF) { 
        CIN; 
        mincost = 999999; maxcost = -1; sum = 0.0; 
        for (i = 0; i < n; i++) { 
            strcpy(buf, a[i]); 
            for (j = l; j <= u; j++) 
				buf[j] = '.'; 
            srchtop = 0; nodecnt = 0; 
            pmsearch(root, buf); 
			sum += nodecnt; 
            if (nodecnt <= mincost) { mincost = nodecnt; minind = i; } 
            if (nodecnt >= maxcost) { maxcost = nodecnt; maxind = i; } 
        } 
        printf("Range: %d,%d\t%d %s\t%d %s\t%g", l, u, 
            mincost, a[minind], 
            maxcost, a[maxind], 
			sum/n); 
        COUT; NL; 
	} 
} 
 
 
int main(int argc, char *argv[]) 
{   int    i, globalstarttime; 
    char *s = space, *t, *fname; 
    FILE *fp; 
 
    if (argc == 1) // no args 
        fname = "/usr/dict/words"; 
    else // one arg: file name 
        fname = argv[1]; 
    setbuf(stdout, 0); 
    if ((fp = fopen(fname, "r")) == NULL) { 
        fprintf(stderr, "  Can't open file\n"); 
        exit(1); 
    } 
    globalstarttime = clock(); 
    TASK("Big Malloc"); 
        CIN; 
        t = (char *) malloc(8000000*sizeof(char)); 
        free(t); 
        COUT; NL; 
    TASK("Reading Input"); 
        CIN; 
        a[0] = s; 
        while ((i = getc(fp)) != EOF) { 
            if (i == '\n') { 
                *s++ = 0; 
                a[++n] = s; 
            } else 
                *s++ = i; 
        } 
        COUT; NL; 
    if (argc < 3) { // one arg: file name 
        timeall(); 
    } else { 
        if (strcmp(argv[2], "trysearch") == 0) { 
            trysearch(); 
        } else if (strcmp(argv[2], "nncost") == 0) { 
            nncost(); 
        } else if (strcmp(argv[2], "pmcost") == 0) { 
			pmcost(); 
		} else 
            printf("Unrecognized option\n"); 
    } 
    i = clock() - globalstarttime; 
    printf("Total clicks\t%d\nTotal secs\t%4.3f\n", 
        i, (double) i / CLOCKS_PER_SEC); 
    return 0; 
}