www.pudn.com > CSharpSpider.rar > Parse.cs
using System;
namespace Spider
{
///
/// Base class for parseing tag based files, such as HTML, HTTP headers
/// or XML.
///
///
/// This spider is copyright 2003 by Jeff Heaton. However, it is
/// released under a Limited GNU Public License (LGPL). You may
/// use it freely in your own programs. For the latest version visit
/// http://www.jeffheaton.com.
///
///
public class Parse:AttributeList
{
///
/// The source text that is being parsed.
///
private string m_source;
///
/// The current position inside of the text that
/// is being parsed.
///
private int m_idx;
///
/// The most reciently parsed attribute delimiter.
///
private char m_parseDelim;
///
/// This most receintly parsed attribute name.
///
private string m_parseName;
///
/// The most reciently parsed attribute value.
///
private string m_parseValue;
///
/// The most reciently parsed tag.
///
public string m_tag;
///
/// Determine if the specified character is whitespace or not.
///
/// A character to check
/// true if the character is whitespace
public static bool IsWhiteSpace(char ch)
{
return( "\t\n\r ".IndexOf(ch) != -1 );
}
///
/// Advance the index until past any whitespace.
///
public void EatWhiteSpace()
{
while ( !Eof() )
{
if ( !IsWhiteSpace(GetCurrentChar()) )
return;
m_idx++;
}
}
///
/// Determine if the end of the source text has been
/// reached.
///
/// True if the end of the source text has been
/// reached.
public bool Eof()
{
return(m_idx>=m_source.Length );
}
///
/// Parse the attribute name.
///
public void ParseAttributeName()
{
EatWhiteSpace();
// get attribute name
while ( !Eof() )
{
if ( IsWhiteSpace(GetCurrentChar()) ||
(GetCurrentChar()=='=') ||
(GetCurrentChar()=='>') )
break;
m_parseName+=GetCurrentChar();
m_idx++;
}
EatWhiteSpace();
}
///
/// Parse the attribute value
///
public void ParseAttributeValue()
{
if ( m_parseDelim!=0 )
return;
if ( GetCurrentChar()=='=' )
{
m_idx++;
EatWhiteSpace();
if ( (GetCurrentChar()=='\'') ||
(GetCurrentChar()=='\"') )
{
m_parseDelim = GetCurrentChar();
m_idx++;
while ( GetCurrentChar()!=m_parseDelim )
{
m_parseValue+=GetCurrentChar();
m_idx++;
}
m_idx++;
}
else
{
while ( !Eof() &&
!IsWhiteSpace(GetCurrentChar()) &&
(GetCurrentChar()!='>') )
{
m_parseValue+=GetCurrentChar();
m_idx++;
}
}
EatWhiteSpace();
}
}
///
/// Add a parsed attribute to the collection.
///
public void AddAttribute()
{
Attribute a = new Attribute(m_parseName,
m_parseValue,m_parseDelim);
Add(a);
}
///
/// Get the current character that is being parsed.
///
///
public char GetCurrentChar()
{
return GetCurrentChar(0);
}
///
/// Get a few characters ahead of the current character.
///
/// How many characters to peek ahead for.
/// The character that was retrieved.
public char GetCurrentChar(int peek)
{
if( (m_idx+peek)
/// Obtain the next character and advance the index by one.
///
/// The next character
public char AdvanceCurrentChar()
{
return m_source[m_idx++];
}
///
/// Move the index forward by one.
///
public void Advance()
{
m_idx++;
}
///
/// The last attribute name that was encountered.
///
public string ParseName
{
get
{
return m_parseName;
}
set
{
m_parseName = value;
}
}
///
/// The last attribute value that was encountered.
///
public string ParseValue
{
get
{
return m_parseValue;
}
set
{
m_parseValue = value;
}
}
///
/// The last attribute delimeter that was encountered.
///
public char ParseDelim
{
get
{
return m_parseDelim;
}
set
{
m_parseDelim = value;
}
}
///
/// The text that is to be parsed.
///
public string Source
{
get
{
return m_source;
}
set
{
m_source = value;
}
}
}
}
//¸ÃÔ´ÂëÏÂÔØ×Ôwww.aspx1.com(£á£ó £ð£ø£±£®£ã£ï£í)