www.pudn.com > Crawler_bemjh.rar > LogicalLayer.cs
using System;
using System.Data;
using System.Data .SqlClient ;
using System.Collections;
namespace CrawlerLib
{
///
/// 逻辑层
/// 主要是控制数据的走向,但又不直接操作数据。
/// 具体的工作交给下层的数据层
///
public class LogicalLayer
{
private PageElement pe = null;
public LogicalLayer()
{
}
///
/// 初始化连接字符串
///
/// 连接的字符串
public void InitConnString(ConnectionString cs)
{
SqlBase.setConnectionString(cs);
}
///
/// 将一个主页面的信息插入到数据表中
///
/// 要插入的PageElement信息集合
public void InsertIndexPage(PageElement pe)
{
SqlParameter[] param = new SqlParameter[4];
byte[] myBytes = System.Text.Encoding.Default.GetBytes(pe.PageSourceCode);
param[0] = SqlBase.CreateParam("@URL",SqlDbType.VarChar,255,ParameterDirection.Input,pe.URL);
param[1] = SqlBase.CreateParam("@hrefText",SqlDbType.VarChar,255,ParameterDirection.Input,pe.HrefText);
param[2] = SqlBase.CreateParam("@sourceCode",SqlDbType.Image,myBytes.Length,ParameterDirection.Input,myBytes);
param[3] = SqlBase.CreateParam("@score",SqlDbType.Float,8,ParameterDirection.Input,pe.Score);
try
{
SqlBase.ExecuteQuery("InsertIndexPage",param,CommandType.StoredProcedure);
}
catch(Exception e){}
}
///
/// 将一个子页面的信息插入到数据表中
///
/// 要插入的PageElement信息集合
public void InsertSubPage(PageElement pe)
{
SqlParameter[] param = new SqlParameter[5];
byte[] myBytes = System.Text.Encoding.Default.GetBytes(pe.PageSourceCode);
param[0] = SqlBase.CreateParam("@URL",SqlDbType.VarChar,255,ParameterDirection.Input,pe.URL);
param[1] = SqlBase.CreateParam("@hrefText",SqlDbType.VarChar,255,ParameterDirection.Input,pe.HrefText);
param[2] = SqlBase.CreateParam("@sourceCode",SqlDbType.Image,myBytes.Length,ParameterDirection.Input,myBytes);
param[3] = SqlBase.CreateParam("@indexPageID",SqlDbType.VarChar,255,ParameterDirection.Input,pe.IndexPageID);
param[4] = SqlBase.CreateParam("@score",SqlDbType.Float,8,ParameterDirection.Input,pe.Score);
try
{
SqlBase.ExecuteQuery("InsertSubPage",param,CommandType.StoredProcedure);
}
catch(Exception e){}
}
///
/// 获取种子表的种子网站
///
/// 返回一个种子表的数组集合
public ArrayList GetCoreURL()
{
ArrayList al = new ArrayList();
DataTable myTable = null;
SqlParameter[] param = new SqlParameter[1];
//获取要下载的种子网址列表
myTable=SqlBase.ExecuteDataTable("GetCoreURL",CommandType.StoredProcedure);
//从表中读出记录
foreach(DataRow dRow in myTable.Rows)
{
al.Add(dRow[1].ToString());
param[0] = SqlBase.CreateParam("@ID",SqlDbType.Int,4,ParameterDirection.Input,Convert.ToInt32(dRow[0]));
SqlBase.ExecuteQuery("UpdateCoreURLisRead",param,CommandType.StoredProcedure);
}
return al;
}
///
/// 获取种子表的种子网站
///
/// 返回一个种子表的数组集合
public ArrayList GetCoreURLNew()
{
ArrayList al = new ArrayList();
DataTable myTable = new DataTable();
SqlParameter[] param = new SqlParameter[1];
using(SqlConnection con = SqlBase.GetConnection())
{
con.Open();
SqlDataAdapter da = new SqlDataAdapter("select top 20 * from CoreURL where isRead=0 ",con);
da.SelectCommand .CommandType = CommandType.Text;
DataSet ds = new DataSet();
da.Fill(ds,"TBCoreURL");
myTable = ds.Tables["TBCoreURL"];
//从表中读出记录
foreach(DataRow dRow in myTable.Rows)
{
al.Add(dRow[1].ToString());
param[0] = SqlBase.CreateParam("@ID",SqlDbType.Int,4,ParameterDirection.Input,Convert.ToInt32(dRow[0]));
SqlBase.ExecuteQuery("UpdateCoreURLisRead",param,CommandType.StoredProcedure);
}
}
return al;
}
///
/// 从indexPage中获取一定数量的网址
///
/// 返回indexPage表的数组集合
public ArrayList GetIndexURL()
{
ArrayList indexUrl = new ArrayList();
DataTable myTable = null;
SqlParameter[] param = new SqlParameter[1];
//获取要下载的种子网址列表
myTable=SqlBase.ExecuteDataTable("GetIndexPage",CommandType.StoredProcedure);
//从表中读出记录
foreach(DataRow dRow in myTable.Rows)
{
indexUrl.Add(dRow[1]);
param[0] = SqlBase.CreateParam("@ID",SqlDbType.Int,4,ParameterDirection.Input,Convert.ToInt32(dRow[0]));
SqlBase.ExecuteQuery("UpdateIndexPageisRead",param,CommandType.StoredProcedure);
}
return indexUrl;
}
///
/// 从indexPage中获取指定网址的内容
///
/// 要查找的网址
/// 返回一第记录的哈希表
public DataTable GetIndexURL(string theURL)
{
DataTable myTable = null;
SqlParameter[] param = new SqlParameter[1];
param[0] = SqlBase.CreateParam("@URL",SqlDbType.VarChar,255,ParameterDirection.Input,theURL);
//获取指定的网址的内容
myTable=SqlBase.ExecuteDataTable("GetIndexPageByURL",param,CommandType.StoredProcedure);
return myTable;
}
///
/// 获取主题词表
///
/// 返回一个主题词表的哈希表集合
public Hashtable GetTopicLib()
{
Hashtable myTopicLibs = new Hashtable();
DataTable myTable = null;
//获取要下载的种子网址列表
myTable=SqlBase.ExecuteDataTable("GetTopicLib",CommandType.StoredProcedure);
//从表中读出记录
foreach(DataRow dRow in myTable.Rows)
{
myTopicLibs.Add(dRow[0],dRow[1]);
}
return myTopicLibs;
}
///
/// 获取过滤表的内容
///
/// 返回一个过滤表的数组集合
public ArrayList GetFilterURL()
{
ArrayList al = new ArrayList();
DataTable myTable = null;
//获取要下载的种子网址列表
myTable=SqlBase.ExecuteDataTable("GetFilterURL",CommandType.StoredProcedure);
//从表中读出记录
foreach(DataRow dRow in myTable.Rows)
{
al.Add(dRow[0].ToString());
}
return al;
}
///
/// 将首页表中分值高的前N条记录插入到种子表中继续寻找新的网址
///
public void refreshCoreURL()
{
DataTable myTable = null;
//获取首页表中分值高的前N条记录
myTable=SqlBase.ExecuteDataTable("GetIndexPageByScore",CommandType.StoredProcedure);
//从表中读出记录
int count = myTable.Rows.Count;
foreach(DataRow dRow in myTable.Rows)
{
// 插入到种子表
SqlParameter[] param = new SqlParameter[2];
string url = dRow[1].ToString();
float score=(float)Convert.ToDouble(dRow[2]);
param[0] = SqlBase.CreateParam("@URL",SqlDbType.VarChar,255,ParameterDirection.Input,dRow[1]);
param[1] = SqlBase.CreateParam("@score",SqlDbType.Float,8,ParameterDirection.Input,(float)Convert.ToDouble(dRow[2]));
try
{
SqlBase.ExecuteQuery("InsertCoreURL",param,CommandType.StoredProcedure);
}
catch(Exception e){}
// 更新首页表,修改isCoreURL字段
SqlParameter[] param1 = new SqlParameter[1];
param1[0] = SqlBase.CreateParam("@ID",SqlDbType.Int,4,ParameterDirection.Input,Convert.ToInt32(dRow[0]));
SqlBase.ExecuteQuery("UpdateIndexPageisCoreURL",param1,CommandType.StoredProcedure);
}
}
}
}