www.pudn.com > downloadingNet.rar > parser.cpp


 
/* 
	Parser.cpp : CHtmlParser implemention. 
 
	Implements the base HTML parser class. 
 
	Author: Steven E. Sipe 
*/ 
 
#include "stdafx.h" 
 
#include  
 
#include "parser.h" 
 
 
// Constructor 
CHtmlParser::CHtmlParser() 
{ 
} 
 
// Destructor 
CHtmlParser::~CHtmlParser() 
{ 
} 
 
// Finds the specified token(s) in the supplied input buffer.  Sets nIndex to the  
// location of the token 
BOOL CHtmlParser::FindToken(const char *pTokens, const char *pBuffer, int& nIndex,  
                                             int nMaxLen) 
{ 
	int nTokenCount = strlen(pTokens); 
	BOOL bQuote = FALSE; 
 
	// While we haven't run out of buffer space... 
	while(nIndex < nMaxLen) 
	{ 
		// Does this match any of the tokens? 
		for(int i = 0; i < nTokenCount; i++) 
		{ 
			// Yes, return now 
			if(pBuffer[nIndex] == pTokens[i]) 
				return(TRUE); 
		} 
 
		// Try the next character 
		nIndex++; 
	} 
 
	return(FALSE); 
} 
 
// Returns a string delimited by the specifed terminating characters.  Sets nIndex 
// to point to the end of the string. 
CString CHtmlParser::GetString(const char *pTerminals, const char *pBuffer,  
                                       int &nIndex, int nMaxLen) 
{ 
	int nOldIndex = nIndex; 
	CString strText; 
 
	// Skip leading spaces 
	while(nIndex < nMaxLen && pBuffer[nIndex] == ' ') 
		nIndex++; 
 
	// Look for the terminators 
	if(FindToken(pTerminals,pBuffer,nIndex,nMaxLen)) 
	{ 
		// Found one, save the location and the new string 
		nIndex++; 
		strText = pBuffer+nOldIndex; 
		strText = strText.Left(nIndex-nOldIndex-1); 
 
	} 
 
	return(strText); 
} 
 
// Gets a parameter name 
CString CHtmlParser::SaveParm(const CString& strParm, BOOL bLowercase) 
{ 
	int nLen = strParm.GetLength(); 
	CString strNewParm = strParm; 
 
	// Parameter are delimited by a space or a quote 
	for(int i = 0; i < nLen; i++) 
	{ 
		if(strNewParm[i] == '"') 
			strNewParm.SetAt(i,' '); 
	} 
 
	// Remove leading or trailing spaces 
	strNewParm.TrimLeft(); 
	strNewParm.TrimRight(); 
 
	// Make the parameter's name lower case --- NOT the value 
	if(bLowercase) 
		strNewParm.MakeLower(); 
 
	// Return the new name 
	return(strNewParm); 
} 
 
// Gets options supplied with a parameter 
void CHtmlParser::GetOptions(const char *pBuffer, int &nIndex, int nMaxLen) 
{ 
	CString strParm; 
	int nStart; 
 
	// Iterate through the buffer until we're finished 
	while(nIndex < nMaxLen) 
	{ 
		nStart = nIndex; 
 
		// Find the first assignment statement 
		strParm = GetString("=",pBuffer,nIndex,nMaxLen); 
 
		// Do we have a parameter name? 
		if(!strParm.IsEmpty()) 
		{ 
			COptions opt; 
			int nStart = nIndex; 
 
			opt.SetParm(SaveParm(strParm,TRUE)); 
 
			// Skip leading spaces 
			while(nIndex < nMaxLen && pBuffer[nIndex] == ' ') 
				nIndex++; 
 
			// Look for the end of the parameter or for the end 
			// of the HTML tag 
//			if(FindToken("\" >\r\n",pBuffer,nIndex,nMaxLen)) 
			if(FindToken("\" >",pBuffer,nIndex,nMaxLen)) 
			{ 
				// Handle quoted arguments 
				if(pBuffer[nIndex] == '"') 
					FindToken("\"",pBuffer,++nIndex,nMaxLen); 
 
				// Save the option's location 
				strParm = pBuffer+nStart; 
				strParm = strParm.Left(nIndex-nStart); 
				opt.SetValue(SaveParm(strParm,FALSE)); 
				opt.SetIndex(nStart); 
			} 
 
			// Add the option to the collection 
			m_arrOptions.Add(opt); 
 
		} 
		else break; 
	} 
} 
 
// Parses an HTML tag into parameters and values. 
BOOL CHtmlParser::GetTag(const char *pBuffer, int& nIndex, int nMaxLen) 
{ 
	m_strTag.Empty(); 
	m_arrOptions.RemoveAll(); 
 
	// Find the start of the tag 
	if(FindToken("<",pBuffer,nIndex,nMaxLen)) 
	{ 
		int nStart = nIndex+1; 
		BOOL bFound = FALSE; 
 
		// Point to the next character 
		nIndex++; 
 
		// Not a valid tag, or a comment...continue looking 
		if(pBuffer[nIndex] == ' ' || pBuffer[nIndex] == '>' ||  
                     pBuffer[nIndex] == '!') 
			return(FALSE); 
 
		// Find the end of the tag 
		if(FindToken(">",pBuffer,nIndex,nMaxLen)) 
		{ 
	         // Get the tag 
			m_strTag = GetString(" >\r\n",pBuffer,nStart,nMaxLen); 
			m_strTag.MakeLower(); 
 
			// Do we care about this tag??? 
			if(WantTag(m_strTag)) 
			{ 
				// Move back to the terminator so we can look for options 
				nStart--; 
 
				// Get the tags options 
				GetOptions(pBuffer,nStart,nIndex+1); 
			} 
 
				return(TRUE); 
		} 
	} 
 
	return(FALSE); 
} 
 
// Default implementation -- gets every tag.  OVERRIDE THIS 
BOOL CHtmlParser::WantTag(const CString& strTag) 
{ 
   return(TRUE); 
} 
 
// Default implementation -- does nothing with tags.  OVERRIDE THIS 
BOOL CHtmlParser::ProcessTag(const CString& strTag, int nIndex,  
											const HTML_ARGS& arrOptions) 
{ 
   return(TRUE); 
} 
 
// Parses the specified buffer of data for HTML tags 
void CHtmlParser::ParseText(char *pBuffer, int nMaxLen) 
{ 
	int nIndex = 0; 
 
	// Initialize some work flags 
	m_strTag.Empty(); 
	m_arrOptions.RemoveAll(); 
	m_pRawBuffer = pBuffer; 
 
	// Iterate through the entire buffer 
	while(nIndex < nMaxLen) 
	{ 
		// Look for tags until no more are found 
		if(GetTag(pBuffer,nIndex,nMaxLen)) 
		{ 
			// Should we continue processing? 
			if(!ProcessTag(m_strTag,nIndex,m_arrOptions)) 
				break; 
		} 
	} 
}