www.pudn.com > InternetÖ©Öë³ÌÐò.zip > Parse.cs


using System; 
 
namespace Spider 
{ 
	///  
	/// Base class for parseing tag based files, such as HTML, HTTP headers 
	/// or XML. 
	///  
	///  
	/// This spider is copyright 2003 by Jeff Heaton. However, it is 
	/// released under a Limited GNU Public License (LGPL). You may  
	/// use it freely in your own programs. For the latest version visit 
	/// http://www.jeffheaton.com. 
	///	 
	///  
	public class Parse:AttributeList  
	{
		///  
		/// The source text that is being parsed. 
		/// 
		private string m_source;

		///  
		/// The current position inside of the text that 
		/// is being parsed. 
		/// 
		private int m_idx;

		///  
		/// The most reciently parsed attribute delimiter. 
		/// 
		private char m_parseDelim;

		///  
		/// This most receintly parsed attribute name. 
		/// 
		private string m_parseName;

		///  
		/// The most reciently parsed attribute value. 
		/// 
		private string m_parseValue;

		///  
		/// The most reciently parsed tag. 
		/// 
		public string m_tag;

		///  
		/// Determine if the specified character is whitespace or not. 
		///  
		/// A character to check 
		/// true if the character is whitespace
		public static bool IsWhiteSpace(char ch)
		{
			return( "\t\n\r ".IndexOf(ch) != -1 );
		}


		///  
		/// Advance the index until past any whitespace. 
		/// 
		public void EatWhiteSpace()
		{
			while ( !Eof() )  
			{
				if ( !IsWhiteSpace(GetCurrentChar()) )
					return;
				m_idx++;
			}
		}

		///  
		/// Determine if the end of the source text has been 
		/// reached.  
		///  
		/// True if the end of the source text has been
		/// reached.
		public bool Eof()
		{
			return(m_idx>=m_source.Length );
		}

		///  
		/// Parse the attribute name. 
		/// 
		public void ParseAttributeName()
		{
			EatWhiteSpace();
			// get attribute name
			while ( !Eof() )  
			{
				if ( IsWhiteSpace(GetCurrentChar()) ||
					(GetCurrentChar()=='=') ||
					(GetCurrentChar()=='>') )
					break;
				m_parseName+=GetCurrentChar();
				m_idx++;
			}

			EatWhiteSpace();
		}


		///  
		/// Parse the attribute value 
		/// 
		public void ParseAttributeValue()
		{
			if ( m_parseDelim!=0 )
				return;

			if ( GetCurrentChar()=='=' )  
			{
				m_idx++;
				EatWhiteSpace();
				if ( (GetCurrentChar()=='\'') ||
					(GetCurrentChar()=='\"') )  
				{
					m_parseDelim = GetCurrentChar();
					m_idx++;
					while ( GetCurrentChar()!=m_parseDelim )  
					{
						m_parseValue+=GetCurrentChar();
						m_idx++;
					}
					m_idx++;
				}  
				else  
				{
					while ( !Eof() &&
						!IsWhiteSpace(GetCurrentChar()) &&
						(GetCurrentChar()!='>') )  
					{
						m_parseValue+=GetCurrentChar();
						m_idx++;
					}
				}
				EatWhiteSpace();
			}
		}

		///  
		/// Add a parsed attribute to the collection. 
		/// 
		public void AddAttribute()
		{
			Attribute a = new Attribute(m_parseName,
				m_parseValue,m_parseDelim);
			Add(a);
		}
 
		///  
		/// Get the current character that is being parsed. 
		///  
		///  
		public char GetCurrentChar() 
		{ 
			return GetCurrentChar(0); 
		} 
 
		///  
		/// Get a few characters ahead of the current character. 
		///  
		/// How many characters to peek ahead for. 
		/// The character that was retrieved. 
		public char GetCurrentChar(int peek) 
		{ 
			if( (m_idx+peek) 
		/// Obtain the next character and advance the index by one. 
		///  
		/// The next character 
		public char AdvanceCurrentChar() 
		{ 
			return m_source[m_idx++]; 
		} 
 
		///  
		/// Move the index forward by one. 
		/// 
		public void Advance()
		{
			m_idx++;
		}


		///  
		/// The last attribute name that was encountered. 
		/// 
		public string ParseName
		{
			get 
			{
				return m_parseName;
			}

			set 
			{
				m_parseName = value;
			}
		}

		///  
		/// The last attribute value that was encountered. 
		/// 
		public string ParseValue
		{
			get 
			{
				return m_parseValue;
			}

			set 
			{
				m_parseValue = value;
			}
		}

		///  
		/// The last attribute delimeter that was encountered. 
		/// 
		public char ParseDelim
		{
			get 
			{
				return m_parseDelim;
			}

			set 
			{
				m_parseDelim = value;
			}
		}

		///  
		/// The text that is to be parsed. 
		/// 
		public string Source
		{
			get 
			{
				return m_source;
			}

			set 
			{
				m_source = value;
			}
		}
	} 
}