www.pudn.com > unmime.rar > UNMIME.C


/* 
 * MIME mail decoding. 
 * 
 * This module contains decoding routines for converting 
 * quoted-printable data into pure 8-bit data, in MIME 
 * formatted messages. 
 * 
 * By Henrik Storner  
 * 
 * Configuration file support for fetchmail 4.3.8 by  
 * Frank Damgaard  
 *  
 */ 
 
#include "config.h" 
#include  
#include  
#include  
#include "fetchmail.h" 
 
static unsigned char unhex(unsigned char c) 
{ 
  if ((c >= '0') && (c <= '9')) 
    return (c - '0'); 
  else if ((c >= 'A') && (c <= 'F')) 
    return (c - 'A' + 10); 
  else if ((c >= 'a') && (c <= 'f')) 
    return (c - 'a' + 10); 
  else 
    return c; 
} 
 
static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out) 
{ 
  c1 = unhex(c1); 
  c2 = unhex(c2); 
 
  if ((c1 > 15) || (c2 > 15))  
    return 1; 
  else { 
    *c_out = 16*c1+c2; 
    return 0; 
  } 
} 
 
 
 
/* 
 * Routines to decode MIME QP-encoded headers, as per RFC 2047. 
 */ 
 
/* States of the decoding state machine */ 
#define S_COPY_PLAIN        0	/* Just copy, but watch for the QP flag */ 
#define S_SKIP_MIMEINIT     1	/* Get the encoding, and skip header */ 
#define S_COPY_MIME         2	/* Decode a sequence of coded characters */ 
 
static const char MIMEHDR_INIT[]  = "=?";	/* Start of coded sequence */ 
static const char MIMEHDR_END[]   = "?=";	/* End of coded sequence */ 
 
void UnMimeHeader(unsigned char *hdr) 
{ 
  /* Decode a buffer containing data encoded according to RFC 
   * 2047. This only handles content-transfer-encoding; conversion 
   * between character sets is not implemented.  In other words: We 
   * assume the charsets used can be displayed by your mail program 
   * without problems.  
   */ 
 
  /* Note: Decoding is done "in-situ", i.e. without using an 
   * additional buffer for temp. storage. This is possible, since the 
   * decoded string will always be shorter than the encoded string, 
   * due to the en- coding scheme. 
   */ 
 
  int  state = S_COPY_PLAIN; 
  unsigned char *p_in, *p_out, *p; 
  unsigned char enc = '\0';		/* initialization pacifies -Wall */ 
  int  i; 
 
  /* Speed up in case this is not a MIME-encoded header */ 
  p = strstr(hdr, MIMEHDR_INIT); 
  if (p == NULL) 
    return;   /* No MIME header */ 
 
  /* Loop through the buffer. 
   *  p_in : Next char to be processed. 
   *  p_out: Where to put the next processed char 
   *  enc  : Encoding used (usually, 'q' = quoted-printable) 
   */ 
  for (p_out = p_in = hdr; (*p_in); ) { 
    switch (state) { 
    case S_COPY_PLAIN: 
      p = strstr(p_in, MIMEHDR_INIT); 
      if (p == NULL) { 
	/*  
	 * No more coded data in buffer,  
         * just move remainder into place.  
	 */ 
        i = strlen(p_in);   /* How much left */ 
	memmove(p_out, p_in, i); 
	p_in += i; p_out += i; 
      } 
      else { 
	/* MIME header init found at location p */ 
	if (p > p_in) { 
          /* There are some uncoded chars at the beginning. */ 
          i = (p - p_in); 
	  memmove(p_out, p_in, i); 
	  p_out += i; 
	} 
	p_in = (p + 2); 
	state = S_SKIP_MIMEINIT; 
      } 
      break; 
 
    case S_SKIP_MIMEINIT: 
      /* Mime type definition: "charset?encoding?" */ 
      p = strchr(p_in, '?'); 
      if (p != NULL) { 
	/* p_in .. (p-1) holds the charset */ 
 
	/* *(p+1) is the transfer encoding, *(p+2) must be a '?' */ 
	if (*(p+2) == '?') { 
	  enc = tolower(*(p+1)); 
	  p_in = p+3; 
	  state = S_COPY_MIME; 
	} 
	else 
	  state = S_COPY_PLAIN; 
      } 
      else 
	state = S_COPY_PLAIN;   /* Invalid data */ 
      break; 
 
    case S_COPY_MIME: 
      p = strstr(p_in, MIMEHDR_END);  /* Find end of coded data */ 
      if (p == NULL) p = p_in + strlen(p_in); 
      for (; (p_in < p); ) { 
	/* Decode all encoded data */ 
	if (enc == 'q') { 
	  if (*p_in == '=') { 
	    /* Decode one char qp-coded at (p_in+1) and (p_in+2) */ 
	    if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0) 
	      p_in += 3; 
	    else { 
	      /* Invalid QP data - pass through unchanged. */ 
	      *p_out = *p_in; 
	      p_in++; 
	    } 
	  } 
	  else if (*p_in == '_') { 
	    /*  
             * RFC 2047: '_' inside encoded word represents 0x20. 
             * NOT a space - always the value 0x20. 
             */ 
	    *p_out = 0x20; 
	    p_in++; 
	  } 
	  else { 
	    /* Copy unchanged */ 
	    *p_out = *p_in; 
	    p_in++; 
	  } 
	  p_out++; 
	} 
	else if (enc == 'b') { 
	  /* Decode base64 encoded data */ 
	  char delimsave; 
	  int decoded_count; 
 
	  delimsave = *p; *p = '\r'; 
	  decoded_count = from64tobits(p_out, p_in); 
	  *p = delimsave; 
	  if (decoded_count > 0)  
	    p_out += decoded_count;             
	  p_in = p; 
	} 
	else { 
	  /* Copy unchanged */ 
	  *p_out = *p_in; 
	  p_in++; 
	  p_out++; 
	} 
      } 
      if (*p_in) 
	p_in += 2;   /* Skip the MIMEHDR_END delimiter */ 
 
      /*  
       * We've completed decoding one encoded sequence. But another 
       * may follow immediately, in which case whitespace before the 
       * new MIMEHDR_INIT delimiter must be discarded. 
       * See if that is the case  
       */ 
      p = strstr(p_in, MIMEHDR_INIT); 
      state = S_COPY_PLAIN; 
      if (p != NULL) { 
	/* 
	 * There is more MIME data later on. Is there 
         * whitespace  only before the delimiter?  
	 */ 
        unsigned char *q; 
        int  wsp_only = 1; 
 
        for (q=p_in; (wsp_only && (q < p)); q++) 
          wsp_only = isspace(*q); 
 
        if (wsp_only) { 
	  /*  
	   * Whitespace-only before the MIME delimiter. OK, 
           * just advance p_in to past the new MIMEHDR_INIT, 
           * and prepare to process the new MIME charset/encoding 
	   * header. 
	   */ 
	  p_in = p + strlen(MIMEHDR_INIT); 
	  state = S_SKIP_MIMEINIT; 
        } 
      } 
      break; 
    } 
  } 
 
  *p_out = '\0'; 
} 
 
 
 
/* 
 * Routines for decoding body-parts of a message. 
 * 
 * Since the "fetch" part of fetchmail gets a message body 
 * one line at a time, we need to maintain some state variables 
 * across multiple invokations of the UnMimeBodyline() routine. 
 * The driver routine should call MimeBodyType() when all 
 * headers have been received, and then UnMimeBodyline() for 
 * every line in the message body. 
 * 
 */ 
#define S_BODY_DATA 0 
#define S_BODY_HDR  1 
 
/*  
 * Flag indicating if we are currently processing  
 * the headers or the body of a (multipart) message. 
 */ 
static int  BodyState = S_BODY_DATA; 
 
/*  
 * Flag indicating if we are in the process of decoding 
 * a quoted-printable body part. 
 */ 
static int  CurrEncodingIsQP = 0; 
 
/*  
 * Delimiter for multipart messages. RFC 2046 states that this must 
 * NEVER be longer than 70 characters. Add 3 for the two hyphens 
 * at the beginning, and a terminating null. 
 */ 
#define MAX_DELIM_LEN 70 
static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3]; 
 
 
/* This string replaces the "Content-Transfer-Encoding: quoted-printable" 
 * string in all headers, including those in body-parts. The replacement 
 * must be no longer than the original string. 
 */ 
static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit"; 
static void SetEncoding8bit(unsigned char *XferEncOfs) 
{ 
  unsigned char *p; 
 
  if (XferEncOfs != NULL) { 
     memcpy(XferEncOfs, ENC8BIT, strlen(ENC8BIT)); 
 
     /* If anything left, in this header, replace with whitespace */ 
     for (p=XferEncOfs+strlen(ENC8BIT); (*p >= ' '); p++) *p=' '; 
  } 
} 
 
static char *GetBoundary(char *CntType) 
{ 
  char *p1, *p2; 
  int flag; 
 
  /* Find the "boundary" delimiter. It must be preceded with a ';' 
   * and optionally some whitespace. 
   */ 
  p1 = CntType; 
  do { 
    p2 = strchr(p1, ';');  
    if (p2) 
      for (p2++; isspace(*p2); p2++); 
 
    p1 = p2; 
  } while ((p1) && (strncasecmp(p1, "boundary", 8) != 0)); 
 
  if (p1 == NULL) 
    /* No boundary delimiter */ 
    return NULL; 
 
  /* Skip "boundary", whitespace and '='; check that we do have a '=' */ 
  for (p1+=8, flag=0; (isspace(*p1) || (*p1 == '=')); p1++) 
    flag |= (*p1 == '='); 
  if (!flag) 
    return NULL; 
 
  /* Find end of boundary delimiter string */ 
  if (*p1 == '\"') { 
    /* The delimiter is inside quotes */ 
    p1++; 
    p2 = strchr(p1, '\"'); 
    if (p2 == NULL) 
      return NULL;  /* No closing '"' !?! */ 
  } 
  else { 
    /* There might be more text after the "boundary" string. */ 
    p2 = strchr(p1, ';');  /* Safe - delimiter with ';' must be in quotes */ 
  } 
 
  /* Zero-terminate the boundary string */ 
  if (p2 != NULL) 
    *p2 = '\0'; 
 
  return (p1 && strlen(p1)) ? p1 : NULL; 
} 
 
 
/* 
 * This routine does three things: 
 * 1) It determines - based on the message headers - whether the 
 *    message body is a MIME message that may hold 8 bit data. 
 *    - A message that has a "quoted-printable" or "8bit" transfer  
 *      encoding is assumed to contain 8-bit data (when decoded). 
 *    - A multipart message is assumed to contain 8-bit data 
 *      when decoded (there might be quoted-printable body-parts). 
 *    - All other messages are assumed NOT to include 8-bit data. 
 * 2) It determines the delimiter-string used in multi-part message 
 *    bodies. 
 * 3) It sets the initial values of the CurrEncodingIsQP and BodyState 
 *    variables, from the header contents. 
 * 
 * The return value is a bitmask. 
 */ 
int MimeBodyType(unsigned char *hdrs, int WantDecode) 
{ 
  unsigned char *NxtHdr = hdrs; 
  unsigned char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p; 
  int  HdrsFound = 0;     /* We only look for three headers */ 
  int  BodyType;          /* Return value */  
 
  /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */ 
  MultipartDelimiter[0] = '\0'; 
  CurrEncodingIsQP = 0; 
  BodyState = S_BODY_DATA; 
  BodyType = 0; 
 
  /* Just in case ... */ 
  if (hdrs == NULL) 
    return BodyType; 
 
  XferEnc = XferEncOfs = CntType = MimeVer = NULL; 
 
  do { 
    if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) { 
      XferEncOfs = NxtHdr; 
      p = nxtaddr(NxtHdr); 
      if (p != NULL) { 
	xalloca(XferEnc, char *, strlen(p) + 1); 
	strcpy(XferEnc, p); 
	HdrsFound++; 
      } 
    } 
    else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) { 
      /* 
       * This one is difficult. We cannot use the standard 
       * nxtaddr() routine, since the boundary-delimiter is 
       * (probably) enclosed in quotes - and thus appears 
       * as an rfc822 comment, and nxtaddr() "eats" up any 
       * spaces in the delimiter. So, we have to do this 
       * by hand. 
       */ 
 
      /* Skip the "Content-Type:" part and whitespace after it */ 
      for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == '\t')); NxtHdr++); 
 
      /*  
       * Get the full value of the Content-Type header; 
       * it might span multiple lines. So search for 
       * a newline char, but ignore those that have a 
       * have a TAB or space just after the NL (continued 
       * lines). 
       */ 
      p = NxtHdr-1; 
      do { 
        p=strchr((p+1),'\n');  
      } while ( (p != NULL) && ((*(p+1) == '\t') || (*(p+1) == ' ')) ); 
      if (p == NULL) p = NxtHdr + strlen(NxtHdr); 
 
      xalloca(CntType, char *, p-NxtHdr+2); 
      strncpy(CntType, NxtHdr, (p-NxtHdr)); 
      *(CntType+(p-NxtHdr)) = '\0'; 
      HdrsFound++; 
    } 
    else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0) { 
      p = nxtaddr(NxtHdr); 
      if (p != NULL) { 
	xalloca(MimeVer, char *, strlen(p) + 1); 
	strcpy(MimeVer, p); 
	HdrsFound++; 
      } 
    } 
 
    NxtHdr = (strchr(NxtHdr, '\n')); 
    if (NxtHdr != NULL) NxtHdr++; 
  } while ((NxtHdr != NULL) && (*NxtHdr) && (HdrsFound != 3)); 
 
 
  /* Done looking through the headers, now check what they say */ 
  if ((MimeVer != NULL) && (strcmp(MimeVer, "1.0") == 0)) { 
 
    /* Check Content-Type to see if this is a multipart message */ 
    if ( (CntType != NULL) && 
         ((strncasecmp(CntType, "multipart/", 10) == 0) || 
	  (strncasecmp(CntType, "message/", 8) == 0)) ) { 
 
      char *p1 = GetBoundary(CntType); 
 
      if (p1 != NULL) { 
	/* The actual delimiter is "--" followed by  
	   the boundary string */ 
	strcpy(MultipartDelimiter, "--"); 
	strncat(MultipartDelimiter, p1, MAX_DELIM_LEN); 
	BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE); 
      } 
    } 
 
    /*  
     * Check Content-Transfer-Encoding, but 
     * ONLY for non-multipart messages (BodyType == 0). 
     */ 
    if ((XferEnc != NULL) && (BodyType == 0)) { 
      if (strcasecmp(XferEnc, "quoted-printable") == 0) { 
	CurrEncodingIsQP = 1; 
	BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE); 
	if (WantDecode) { 
           SetEncoding8bit(XferEncOfs); 
        } 
      } 
      else if (strcasecmp(XferEnc, "7bit") == 0) { 
	CurrEncodingIsQP = 0; 
	BodyType = (MSG_IS_7BIT); 
      } 
      else if (strcasecmp(XferEnc, "8bit") == 0) { 
	CurrEncodingIsQP = 0; 
	BodyType = (MSG_IS_8BIT); 
      } 
    } 
 
  } 
 
  return BodyType; 
} 
 
 
/* 
 * Decode one line of data containing QP data. 
 * Return flag set if this line ends with a soft line-break. 
 * 'bufp' is modified to point to the end of the output buffer. 
 */ 
static int DoOneQPLine(unsigned char **bufp, int collapsedoubledot) 
{ 
  unsigned char *buf = *bufp; 
  unsigned char *p_in, *p_out, *p; 
  int n; 
  int ret = 0; 
 
  p_in = buf; 
  if (collapsedoubledot && (strncmp(buf, "..", 2) == 0)) 
    p_in++; 
 
  for (p_out = buf; (*p_in); ) { 
    p = strchr(p_in, '='); 
    if (p == NULL) { 
      /* No more QP data, just move remainder into place */ 
      n = strlen(p_in); 
      memmove(p_out, p_in, n); 
      p_in += n; p_out += n; 
    } 
    else { 
      if (p > p_in) { 
	/* There are some uncoded chars at the beginning. */ 
	n = (p - p_in); 
	memmove(p_out, p_in, n); 
	p_out += n; 
      } 
               
      switch (*(p+1)) { 
      case '\0': case '\r': case '\n': 
	/* Soft line break, skip '=' */ 
	p_in = p+1;  
	if (*p_in == '\r') p_in++; 
	if (*p_in == '\n') p_in++; 
        ret = 1; 
	break; 
 
      default: 
	/* There is a QP encoded byte */ 
	if (qp_char(*(p+1), *(p+2), p_out) == 0) { 
	  p_in = p+3; 
	} 
	else { 
	  /* Invalid QP data - pass through unchanged. */ 
	  *p_out = '='; 
	  p_in = p+1; 
	} 
	p_out++; 
	break; 
      } 
    } 
  } 
 
  *p_out = '\0'; 
  *bufp = p_out; 
  return ret; 
} 
 
 
/* This is called once per line in the message body.  We need to scan 
 * all lines in the message body for the multipart delimiter string, 
 * and handle any body-part headers in such messages (these can toggle 
 * qp-decoding on and off). 
 * 
 * Note: Messages that are NOT multipart-messages go through this 
 * routine quickly, since BodyState will always be S_BODY_DATA, 
 * and MultipartDelimiter is NULL. 
 * 
 * Return flag set if this line ends with a soft line-break. 
 * 'bufp' is modified to point to the end of the output buffer. 
 */ 
 
int UnMimeBodyline(unsigned char **bufp, int collapsedoubledot) 
{ 
  unsigned char *buf = *bufp; 
  int ret = 0; 
 
  switch (BodyState) { 
  case S_BODY_HDR: 
    UnMimeHeader(buf);   /* Headers in body-parts can be encoded, too! */ 
    if (strncasecmp("Content-Transfer-Encoding:", buf, 26) == 0) { 
      char *XferEnc; 
 
      XferEnc = nxtaddr(buf); 
      if ((XferEnc != NULL) && (strcasecmp(XferEnc, "quoted-printable") == 0)) { 
	CurrEncodingIsQP = 1; 
	SetEncoding8bit(buf); 
      } 
    } 
    else if ((*buf == '\0') || (*buf == '\n') || (strcmp(buf, "\r\n") == 0)) 
      BodyState = S_BODY_DATA; 
 
    *bufp = (buf + strlen(buf)); 
    break; 
 
  case S_BODY_DATA: 
    if ((*MultipartDelimiter) &&  
	(strncmp(buf, MultipartDelimiter, strlen(MultipartDelimiter)) == 0)) { 
      BodyState = S_BODY_HDR; 
      CurrEncodingIsQP = 0; 
    } 
 
    if (CurrEncodingIsQP)  
      ret = DoOneQPLine(bufp, collapsedoubledot); 
    else 
     *bufp = (buf + strlen(buf)); 
    break; 
  } 
 
  return ret; 
} 
 
 
#ifdef STANDALONE 
#include  
#include  
 
char *program_name = "unmime"; 
 
#define BUFSIZE_INCREMENT 4096 
 
#ifdef DEBUG 
#define DBG_FWRITE(B,L,BS,FD) fwrite(B, L, BS, FD) 
#else 
#define DBG_FWRITE(B,L,BS,FD) 
#endif 
 
int main(int argc, char *argv[]) 
{ 
  unsigned int BufSize; 
  unsigned char *buffer, *buf_p; 
  int nl_count, i, bodytype; 
 
#ifdef DEBUG 
  pid_t pid; 
  FILE *fd_orig, *fd_conv; 
  char fnam[100]; 
 
  pid = getpid(); 
  sprintf(fnam, "/tmp/i_unmime.%x", pid); 
  fd_orig = fopen(fnam, "w"); 
  sprintf(fnam, "/tmp/o_unmime.%x", pid); 
  fd_conv = fopen(fnam, "w"); 
#endif 
 
  BufSize = BUFSIZE_INCREMENT;    /* Initial size of buffer */ 
  buf_p = buffer = (unsigned char *) xmalloc(BufSize); 
  nl_count = 0; 
 
  do { 
    i = fread(buf_p, 1, 1, stdin); 
    switch (*buf_p) { 
     case '\n': 
       nl_count++; 
       break; 
 
     case '\r': 
       break; 
 
     default: 
       nl_count = 0; 
       break; 
    } 
 
    buf_p++; 
    if ((buf_p - buffer) == BufSize) { 
       /* Buffer is full! Get more room. */ 
       buffer = xrealloc(buffer, BufSize+BUFSIZE_INCREMENT); 
       buf_p = buffer + BufSize; 
       BufSize += BUFSIZE_INCREMENT; 
    } 
  } while ((i > 0) && (nl_count < 2)); 
 
  *buf_p = '\0'; 
  DBG_FWRITE(buffer, strlen(buffer), 1, fd_orig); 
 
  UnMimeHeader(buffer); 
  bodytype = MimeBodyType(buffer, 1); 
 
  i = strlen(buffer); 
  fwrite(buffer, i, 1, stdout); 
  DBG_FWRITE(buffer, i, 1, fd_conv); 
   
  do { 
     buf_p = (buffer - 1); 
     do { 
        buf_p++; 
        i = fread(buf_p, 1, 1, stdin); 
     } while ((i == 1) && (*buf_p != '\n')); 
     if (i == 1) buf_p++; 
     *buf_p = '\0'; 
     DBG_FWRITE(buf, (buf_p - buffer), 1, fd_orig); 
 
     if (buf_p > buffer) { 
        if (bodytype & MSG_NEEDS_DECODE) { 
           buf_p = buffer; 
           UnMimeBodyline(&buf_p, 0); 
        } 
        fwrite(buffer, (buf_p - buffer), 1, stdout); 
        DBG_FWRITE(buffer, (buf_p - buffer), 1, fd_conv); 
     } 
  } while (buf_p > buffer); 
 
  free(buffer); 
  fflush(stdout); 
 
#ifdef DEBUG 
  fclose(fd_orig); 
  fclose(fd_conv); 
#endif 
 
  return 0; 
} 
#endif