www.pudn.com > ids_snort.zip > mstring.c


/* $Id: mstring.c,v 1.5 2001/01/02 08:06:00 roesch Exp $ */ 
/* 
** Copyright (C) 1998,1999,2000,2001 Martin Roesch  
** 
** This program is free software; you can redistribute it and/or modify 
** it under the terms of the GNU General Public License as published by 
** the Free Software Foundation; either version 2 of the License, or 
** (at your option) any later version. 
** 
** This program is distributed in the hope that it will be useful, 
** but WITHOUT ANY WARRANTY; without even the implied warranty of 
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
** GNU General Public License for more details. 
** 
** You should have received a copy of the GNU General Public License 
** along with this program; if not, write to the Free Software 
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 
*/ 
 
/*************************************************************************** 
 * 
 * File: MSTRING.C 
 * 
 * Purpose: Provide a variety of string functions not included in libc.  Makes 
 *          up for the fact that the libstdc++ is hard to get reference 
 *          material on and I don't want to write any more non-portable c++ 
 *          code until I have solid references and libraries to use. 
 * 
 * History: 
 * 
 * Date:      Author:  Notes: 
 * ---------- ------- ---------------------------------------------- 
 *  08/19/98    MFR    Initial coding begun 
 *  03/06/99    MFR    Added Boyer-Moore pattern match routine, don't use 
 *                     mContainsSubstr() any more if you don't have to 
 *  12/31/99	JGW    Added a full Boyer-Moore implementation to increase 
 *                     performance. Added a case insensitive version of mSearch 
 * 
 **************************************************************************/ 
#include "mstring.h" 
 
#ifdef TEST_MSTRING 
 
int main() 
{ 
    char test[] = "\0\0\0\0\0\0\0\0\0CKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\0\0"; 
    char find[] = "CKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\0\0"; 
 
/*   char test[] = "\x90\x90\x90\x90\x90\x90\xe8\xc0\xff\xff\xff/bin/sh\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90"; 
     char find[] = "\xe8\xc0\xff\xff\xff/bin/sh";  */ 
    int i; 
    int toks; 
    int *shift; 
    int *skip; 
 
/*   shift=make_shift(find,sizeof(find)-1); 
     skip=make_skip(find,sizeof(find)-1); */ 
 
    printf("%d\n", 
           mSarch(test, sizeof(test) - 1, find, sizeof(find) - 1, shift, skip)); 
 
    return 0; 
} 
 
#endif 
 
/**************************************************************** 
 * 
 *  Function: mSplit() 
 * 
 *  Purpose: Splits a string into tokens non-destructively. 
 * 
 *  Parameters: 
 *      char *str => the string to be split 
 *      char *sep => a string of token seperaters 
 *      int max_strs => how many tokens should be returned 
 *      int *toks => place to store the number of tokens found in str 
 *      char meta => the "escape metacharacter", treat the character 
 *                   after this character as a literal and "escape" a 
 *                   seperator 
 * 
 *  Returns: 
 *      2D char array with one token per "row" of the returned 
 *      array. 
 * 
 ****************************************************************/ 
char **mSplit(char *str, char *sep, int max_strs, int *toks, char meta) 
{ 
    char **retstr;      /* 2D array which is returned to caller */ 
    char *idx;          /* index pointer into str */ 
    char *end;          /* ptr to end of str */ 
    char *sep_end;      /* ptr to end of seperator string */ 
    char *sep_idx;      /* index ptr into seperator string */ 
    int len = 0;        /* length of current token string */ 
    int curr_str = 0;       /* current index into the 2D return array */ 
    int last_char = 0xFF; 
 
#ifdef DEBUG 
    printf("[*] Splitting string: %s\n", str); 
    printf("curr_str = %d\n", curr_str); 
#endif 
 
    /* 
     * find the ends of the respective passed strings so our while() loops 
     * know where to stop 
     */ 
    sep_end = sep + strlen(sep); 
    end = str + strlen(str); 
 
    /* remove trailing whitespace */ 
    while(isspace((int) *(end - 1)) && ((end - 1) >= str)) 
        *(--end) = '\0';    /* -1 because of NULL */ 
 
    /* set our indexing pointers */ 
    sep_idx = sep; 
    idx = str; 
 
    /* 
     * alloc space for the return string, this is where the pointers to the 
     * tokens will be stored 
     */ 
    retstr = (char **) malloc((sizeof(char **) * max_strs)); 
 
    max_strs--; 
 
#ifdef DEBUG 
    printf("max_strs = %d  curr_str = %d\n", max_strs, curr_str); 
#endif 
 
    /* loop thru each letter in the string being tokenized */ 
    while(idx < end) 
    { 
        /* loop thru each seperator string char */ 
        while(sep_idx < sep_end) 
        { 
            /* 
             * if the current string-indexed char matches the current 
             * seperator char... 
             */ 
            if((*idx == *sep_idx) && (last_char != meta)) 
            { 
                /* if there's something to store... */ 
                if(len > 0) 
                { 
#ifdef DEBUG 
                    printf("Allocating %d bytes for token ", len + 1); 
                    fflush(stdout); 
#endif 
                    if(curr_str <= max_strs) 
                    { 
                        /* allocate space for the new token */ 
                        retstr[curr_str] = (char *) malloc((sizeof(char) * len) + 1); 
 
                        /* make sure we got a good allocation */ 
                        if(retstr[curr_str] == NULL) 
                        { 
                            fprintf(stderr, "msplit() got NULL substring malloc!\n"); 
                            exit(1); 
                        } 
                        /* copy the token into the return string array */ 
                        memcpy(retstr[curr_str], (idx - len), len); 
                        retstr[curr_str][len] = 0; 
#ifdef DEBUG 
                        printf("tok[%d]: %s\n", curr_str, retstr[curr_str]); 
                        fflush(stdout); 
#endif 
                        /* twiddle the necessary pointers and vars */ 
                        len = 0; 
                        curr_str++; 
#ifdef DEBUG 
                        printf("curr_str = %d\n", curr_str); 
                        printf("max_strs = %d  curr_str = %d\n", max_strs, curr_str); 
#endif 
                        last_char = *idx; 
                        idx++; 
                    } 
#ifdef DEBUG 
                    printf("Checking if curr_str (%d) >= max_strs (%d)\n", curr_str, max_strs); 
#endif 
 
                    /* 
                     * if we've gotten all the tokens requested, return the 
                     * list 
                     */ 
                    if(curr_str >= max_strs) 
                    { 
                        while(isspace((int) *idx)) 
                            idx++; 
 
                        len = end - idx; 
#ifdef DEBUG 
                        printf("Finishing up...\n"); 
                        printf("Allocating %d bytes for last token ", len + 1); 
                        fflush(stdout); 
#endif 
                        retstr[curr_str] = (char *) malloc((sizeof(char) * len) + 1); 
 
                        if(retstr[curr_str] == NULL) 
                            printf("Got NULL back from substr malloc\n"); 
 
                        memcpy(retstr[curr_str], idx, len); 
                        retstr[curr_str][len] = 0; 
 
#ifdef DEBUG 
                        printf("tok[%d]: %s\n", curr_str, retstr[curr_str]); 
                        fflush(stdout); 
#endif 
 
                        *toks = curr_str + 1; 
#ifdef DEBUG 
                        printf("max_strs = %d  curr_str = %d\n", max_strs, curr_str); 
                        printf("mSplit got %d tokens!\n", *toks); 
                        fflush(stdout); 
#endif 
                        return retstr; 
                    } 
                } 
                else 
                /* 
                 * otherwise, the previous char was a seperator as well, 
                 * and we should just continue 
                 */ 
                { 
                    last_char = *idx; 
                    idx++; 
                    /* make sure to reset this so we test all the sep. chars */ 
                    sep_idx = sep; 
                    len = 0; 
                } 
            } 
            else 
            { 
                /* go to the next seperator */ 
                sep_idx++; 
            } 
        } 
 
        sep_idx = sep; 
        len++; 
        last_char = *idx; 
        idx++; 
    } 
 
    /* put the last string into the list */ 
 
    if(len > 0) 
    { 
#ifdef DEBUG 
        printf("Allocating %d bytes for last token ", len + 1); 
        fflush(stdout); 
#endif 
 
        retstr[curr_str] = (char *) malloc((sizeof(char) * len) + 1); 
 
        if(retstr[curr_str] == NULL) 
            printf("Got NULL back from substr malloc\n"); 
 
        memcpy(retstr[curr_str], (idx - len), len); 
        retstr[curr_str][len] = 0; 
 
#ifdef DEBUG 
        printf("tok[%d]: %s\n", curr_str, retstr[curr_str]); 
        fflush(stdout); 
#endif 
        *toks = curr_str + 1; 
    } 
#ifdef DEBUG 
    printf("mSplit got %d tokens!\n", *toks); 
    fflush(stdout); 
#endif 
 
    /* return the token list */ 
    return retstr; 
} 
 
 
 
 
/**************************************************************** 
 * 
 *  Function: mContainsSubstr(char *, int, char *, int) 
 * 
 *  Purpose: Determines if a string contains a (non-regex) 
 *           substring. 
 * 
 *  Parameters: 
 *      buf => data buffer we want to find the data in 
 *      b_len => data buffer length 
 *      pat => pattern to find 
 *      p_len => length of the data in the pattern buffer 
 * 
 *  Returns: 
 *      Integer value, 1 on success (str constains substr), 0 on 
 *      failure (substr not in str) 
 * 
 ****************************************************************/ 
int mContainsSubstr(char *buf, int b_len, char *pat, int p_len) 
{ 
    char *b_idx;        /* index ptr into the data buffer */ 
    char *p_idx;        /* index ptr into the pattern buffer */ 
    char *b_end;        /* ptr to the end of the data buffer */ 
    int m_cnt = 0;      /* number of pattern matches so far... */ 
 
#ifdef DEBUG 
    unsigned long loopcnt = 0; 
 
#endif 
 
 
    /* mark the end of the strs */ 
    b_end = (char *) (buf + b_len); 
 
    /* init the index ptrs */ 
    b_idx = buf; 
    p_idx = pat; 
 
    do 
    { 
#ifdef DEBUG 
        loopcnt++; 
#endif 
 
        if(*p_idx == *b_idx) 
        { 
 
            if(m_cnt == (p_len - 1)) 
            { 
#ifdef DEBUG 
                printf("\n%ld compares for match\n", loopcnt); 
#endif 
                return 1; 
            } 
            m_cnt++; 
            b_idx++; 
            p_idx++; 
        } 
        else 
        { 
            if(m_cnt == 0) 
            { 
                b_idx++; 
            } 
            else 
            { 
                b_idx = b_idx - (m_cnt - 1); 
            } 
 
            p_idx = pat; 
 
            m_cnt = 0; 
        } 
 
    } while(b_idx < b_end); 
 
 
    /* if we make it here we didn't find what we were looking for */ 
    return 0; 
} 
 
 
 
 
/**************************************************************** 
 * 
 *  Function: make_skip(char *, int) 
 * 
 *  Purpose: Create a Boyer-Moore skip table for a given pattern 
 * 
 *  Parameters: 
 *      ptrn => pattern 
 *      plen => length of the data in the pattern buffer 
 * 
 *  Returns: 
 *      int * - the skip table 
 * 
 ****************************************************************/ 
int *make_skip(char *ptrn, int plen) 
{ 
    int *skip = (int *) malloc(256 * sizeof(int)); 
    int *sptr = &skip[256]; 
 
    while(sptr-- != skip) 
        *sptr = plen + 1; 
 
    while(plen != 0) 
        skip[(unsigned char) *ptrn++] = plen--; 
 
    return skip; 
} 
 
 
 
/**************************************************************** 
 * 
 *  Function: make_shift(char *, int) 
 * 
 *  Purpose: Create a Boyer-Moore shift table for a given pattern 
 * 
 *  Parameters: 
 *      ptrn => pattern 
 *      plen => length of the data in the pattern buffer 
 * 
 *  Returns: 
 *      int * - the shift table 
 * 
 ****************************************************************/ 
int *make_shift(char *ptrn, int plen) 
{ 
    int *shift = (int *) malloc(plen * sizeof(int)); 
    int *sptr = shift + plen - 1; 
    char *pptr = ptrn + plen - 1; 
    char c = ptrn[plen - 1]; 
 
    *sptr = 1; 
 
    while(sptr-- != shift) 
    { 
        char *p1 = ptrn + plen - 2, *p2, *p3; 
 
        do 
        { 
            while(p1 >= ptrn && *p1-- != c); 
 
            p2 = ptrn + plen - 2; 
            p3 = p1; 
 
            while(p3 >= ptrn && *p3-- == *p2-- && p2 >= pptr); 
        } 
        while(p3 >= ptrn && p2 >= pptr); 
 
        *sptr = shift + plen - sptr + p2 - p3; 
 
        pptr--; 
    } 
 
    return shift; 
} 
 
 
 
/**************************************************************** 
 * 
 *  Function: mSearch(char *, int, char *, int) 
 * 
 *  Purpose: Determines if a string contains a (non-regex) 
 *           substring. 
 * 
 *  Parameters: 
 *      buf => data buffer we want to find the data in 
 *      blen => data buffer length 
 *      ptrn => pattern to find 
 *      plen => length of the data in the pattern buffer 
 *      skip => the B-M skip array 
 *      shift => the B-M shift array 
 * 
 *  Returns: 
 *      Integer value, 1 on success (str constains substr), 0 on 
 *      failure (substr not in str) 
 * 
 ****************************************************************/ 
int mSearch(char *buf, int blen, char *ptrn, int plen, int *skip, int *shift) 
{ 
    int b_idx = plen; 
 
#ifdef  DEBUG 
    int cmpcnt = 0; 
 
#endif 
 
    if(plen == 0) 
        return 1; 
 
    while(b_idx <= blen) 
    { 
        int p_idx = plen, skip_stride, shift_stride; 
 
        while(buf[--b_idx] == ptrn[--p_idx]) 
        { 
#ifdef	DEBUG 
            cmpcnt++; 
#endif 
            if(p_idx == 0) 
            { 
#ifdef	DEBUG 
                fprintf(stdout, "match: compares = %d.\n", cmpcnt); 
#endif 
                return 1; 
            } 
        } 
 
        skip_stride = skip[(unsigned char) buf[b_idx]]; 
        shift_stride = shift[p_idx]; 
 
        b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride; 
    } 
 
#ifdef	DEBUG 
    fprintf(stdout, "no match: compares = %d.\n", cmpcnt); 
#endif 
 
    return 0; 
} 
 
 
 
/**************************************************************** 
 * 
 *  Function: mSearchCI(char *, int, char *, int) 
 * 
 *  Purpose: Determines if a string contains a (non-regex) 
 *           substring matching is case insensitive 
 * 
 *  Parameters: 
 *      buf => data buffer we want to find the data in 
 *      blen => data buffer length 
 *      ptrn => pattern to find 
 *      plen => length of the data in the pattern buffer 
 *      skip => the B-M skip array 
 *      shift => the B-M shift array 
 * 
 *  Returns: 
 *      Integer value, 1 on success (str constains substr), 0 on 
 *      failure (substr not in str) 
 * 
 ****************************************************************/ 
int mSearchCI(char *buf, int blen, char *ptrn, int plen, int *skip, int *shift) 
{ 
    int b_idx = plen; 
 
#ifdef  DEBUG 
    int cmpcnt = 0; 
 
#endif 
 
    if(plen == 0) 
        return 1; 
 
    while(b_idx <= blen) 
    { 
        int p_idx = plen, skip_stride, shift_stride; 
 
        while((unsigned char) ptrn[--p_idx] == toupper((unsigned char) buf[--b_idx])) 
        { 
#ifdef	DEBUG 
            cmpcnt++; 
#endif 
            if(p_idx == 0) 
            { 
#ifdef	DEBUG 
                fprintf(stdout, "match: compares = %d.\n", cmpcnt); 
#endif 
                return 1; 
            } 
        } 
 
        skip_stride = skip[toupper((unsigned char) buf[b_idx])]; 
        shift_stride = shift[p_idx]; 
 
        b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride; 
    } 
 
#ifdef	DEBUG 
    fprintf(stdout, "no match: compares = %d.\n", cmpcnt); 
#endif 
 
    return 0; 
} 
 
 
/**************************************************************** 
 * 
 *  Function: mSearchREG(char *, int, char *, int) 
 * 
 *  Purpose: Determines if a string contains a (regex) 
 *           substring. 
 * 
 *  Parameters: 
 *      buf => data buffer we want to find the data in 
 *      blen => data buffer length 
 *      ptrn => pattern to find 
 *      plen => length of the data in the pattern buffer 
 *      skip => the B-M skip array 
 *      shift => the B-M shift array 
 * 
 *  Returns: 
 *      Integer value, 1 on success (str constains substr), 0 on 
 *      failure (substr not in str) 
 * 
 ****************************************************************/ 
int mSearchREG(char *buf, int blen, char *ptrn, int plen, int *skip, int *shift) 
{ 
    int b_idx = plen; 
    int literal = 0; 
 
#ifdef  DEBUG 
    int cmpcnt = 0; 
 
#endif 
 
    if(plen == 0) 
        return 1; 
 
    while(b_idx <= blen) 
    { 
        int p_idx = plen, skip_stride, shift_stride; 
 
 
        while(buf[--b_idx] == ptrn[--p_idx] 
              || (ptrn[p_idx] == '?' && !literal) 
              || (ptrn[p_idx] == '*' && !literal) 
              || (ptrn[p_idx] == '\\' && !literal)) 
        { 
#ifdef	DEBUG 
            cmpcnt++; 
#endif 
 
            if(literal) 
                literal = 0; 
            if(!literal && ptrn[p_idx] == '\\') 
                literal = 1; 
            if(ptrn[p_idx] == '*') 
            { 
                while(p_idx != 0 && ptrn[--p_idx] == '*'); /* fool-proof */ 
                if(p_idx == 0) 
                { 
#ifdef DEBUG 
                    fprintf(stdout, "match: compares = %d.\n", cmpcnt); 
#endif 
                    return 1; 
                } 
                while(b_idx != 0) 
                { 
                    int ret = 0; 
 
                    /* 
                     * *ooooooch* here we go recurent.. I smell lots of 
                     * headaches here. The algorytm is simple though: if 
                     * substring which we haven't matched matches 
                     * subsctring-to-match before asterick we return `match' 
                     * otherwise we consider non-matched symbol a part of 
                     * `*', move one symbol left and try to match it again. 
                     */ 
                    if(ret == (mSearchREG(&buf[b_idx--], b_idx, 
                                          &ptrn[p_idx], p_idx, 
                                          skip, &shift[p_idx])) != 0) 
                    { 
                        printf("recurs: %i\n", ret); 
                        return ret; 
                    } 
                } 
            } 
            if(p_idx == 0) 
            { 
#ifdef	DEBUG 
                fprintf(stdout, "match: compares = %d.\n", cmpcnt); 
#endif 
                return 1; 
            } 
            if(b_idx == 0) 
                break; 
        } 
 
        skip_stride = skip[(unsigned char) buf[b_idx]]; 
        shift_stride = shift[p_idx]; 
 
        b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride; 
    } 
 
#ifdef	DEBUG 
    fprintf(stdout, "no match: compares = %d.\n", cmpcnt); 
#endif 
 
    return 0; 
}