www.pudn.com > InternetÖ©Öë³ÌÐò.zip > ParseHTML.cs


using System; 
 
namespace Spider 
{ 
	///  
	/// Summary description for ParseHTML. 
	///  
	/// This spider is copyright 2003 by Jeff Heaton. However, it is 
	/// released under a Limited GNU Public License (LGPL). You may  
	/// use it freely in your own programs. For the latest version visit 
	/// http://www.jeffheaton.com. 
	/// 
	///  
 
	public class ParseHTML:Parse  
	{
		public AttributeList GetTag()
		{
			AttributeList tag = new AttributeList();
			tag.Name = m_tag;

			foreach(Attribute x in List)
			{
				tag.Add((Attribute)x.Clone());
			}

			return tag;
		}

		public String BuildTag()
		{
			String buffer="<";
			buffer+=m_tag;
			int i=0;
			while ( this[i]!=null )  
			{// has attributes
				buffer+=" ";
				if ( this[i].Value == null )  
				{
					if ( this[i].Delim!=0 )
						buffer+=this[i].Delim;
					buffer+=this[i].Name;
					if ( this[i].Delim!=0 )
						buffer+=this[i].Delim;
				}  
				else  
				{
					buffer+=this[i].Name;
					if ( this[i].Value!=null )  
					{
						buffer+="=";
						if ( this[i].Delim!=0 )
							buffer+=this[i].Delim;
						buffer+=this[i].Value;
						if ( this[i].Delim!=0 )
							buffer+=this[i].Delim;
					}
				}
				i++;
			}
			buffer+=">";
			return buffer;
		}

		protected void ParseTag()
		{
			m_tag="";
			Clear();

			// Is it a comment?
			if ( (GetCurrentChar()=='!') &&
				(GetCurrentChar(1)=='-')&&
				(GetCurrentChar(2)=='-') )  
			{
				while ( !Eof() )  
				{
					if ( (GetCurrentChar()=='-') &&
						(GetCurrentChar(1)=='-')&&
						(GetCurrentChar(2)=='>') )
						break;
					if ( GetCurrentChar()!='\r' )
						m_tag+=GetCurrentChar();
					Advance();
				}
				m_tag+="--";
				Advance();
				Advance();
				Advance();
				ParseDelim = (char)0;
				return;
			}

			// Find the tag name
			while ( !Eof() )  
			{
				if ( IsWhiteSpace(GetCurrentChar()) || (GetCurrentChar()=='>') )
					break;
				m_tag+=GetCurrentChar();
				Advance();
			}

			EatWhiteSpace();

			// Get the attributes
			while ( GetCurrentChar()!='>' )  
			{
				ParseName = "";
				ParseValue = "";
				ParseDelim = (char)0;

				ParseAttributeName();

				if ( GetCurrentChar()=='>' )  
				{
					AddAttribute();
					break;
				}

				// Get the value(if any)
				ParseAttributeValue();
				AddAttribute();
			}
			Advance();
		}


		public char Parse()
		{
			if( GetCurrentChar()=='<' )  
			{
				Advance();

				char ch=char.ToUpper(GetCurrentChar());
				if ( (ch>='A') && (ch<='Z') || (ch=='!') || (ch=='/') )  
				{
					ParseTag();
					return (char)0;
				}  
				else return(AdvanceCurrentChar());
			}  
			else return(AdvanceCurrentChar());
		} 
	} 
}