www.pudn.com > classifier.rar > studyLearn.cs
using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data.OleDb;
using System.Data;
using System.Threading;
using System.IO;
namespace classifier
{
///
/// studyLearn 的摘要说明。
///
public class studyLearn : System.Windows.Forms.Form
{
private System.ComponentModel.Container components = null;
private System.Windows.Forms.GroupBox groupBox1;
private System.Windows.Forms.GroupBox groupBox2;
private System.Windows.Forms.ListBox listBox1;
private System.Windows.Forms.Label label1;
private System.Windows.Forms.Label typeLabel;
private System.Windows.Forms.Button studyBtn;
private System.Windows.Forms.TextBox mesBox;
private string [] fileList;
private string type;
private Thread fThread;
private string threadMsg;
//分类器部分
private double articleSum,typeSum;
private bool recTempTable; //是否记录临时表
private int searchType; //正向搜索,逆向搜索,双向搜索
private OleDbConnection sqlConnect;
private char[] endChar; //终结符
private char[] missChar; //忽略符
private char[] word; //英文字母
private char[] number; //数字
private ArrayList leftSplit,rightSplit,leftPos,rightPos,leftFreq,rightFreq;
private DataSet ds;
private DataSet ds1;
private int denominator;
private System.Windows.Forms.ComboBox comboBox1;
private System.Windows.Forms.ComboBox comboBox2;
private bool saveSingle;
private System.Windows.Forms.Label label2;
private System.Windows.Forms.ComboBox comboBox3;
private System.Windows.Forms.Button prevBtn;
private System.Windows.Forms.Label label3; //是否保存单个字的单词的标志
public studyLearn()
{
//
// Windows 窗体设计器支持所必需的
//
InitializeComponent();
//
// TODO: 在 InitializeComponent 调用后添加任何构造函数代码
//
}
//自定构造函数
public studyLearn(string [] fileSrc,string typeSrc)
{
InitializeComponent();
fileList=fileSrc;
type=typeSrc;
}
///
/// 清理所有正在使用的资源。
///
///
protected override void Dispose( bool disposing )
{
if( disposing )
{
if(components != null)
{
components.Dispose();
}
}
base.Dispose( disposing );
}
#region Windows 窗体设计器生成的代码
///
/// 设计器支持所需的方法 - 不要使用代码编辑器修改
/// 此方法的内容。
///
private void InitializeComponent()
{
this.groupBox1 = new System.Windows.Forms.GroupBox();
this.label3 = new System.Windows.Forms.Label();
this.comboBox3 = new System.Windows.Forms.ComboBox();
this.label2 = new System.Windows.Forms.Label();
this.comboBox2 = new System.Windows.Forms.ComboBox();
this.comboBox1 = new System.Windows.Forms.ComboBox();
this.studyBtn = new System.Windows.Forms.Button();
this.typeLabel = new System.Windows.Forms.Label();
this.label1 = new System.Windows.Forms.Label();
this.listBox1 = new System.Windows.Forms.ListBox();
this.groupBox2 = new System.Windows.Forms.GroupBox();
this.mesBox = new System.Windows.Forms.TextBox();
this.prevBtn = new System.Windows.Forms.Button();
this.groupBox1.SuspendLayout();
this.groupBox2.SuspendLayout();
this.SuspendLayout();
//
// groupBox1
//
this.groupBox1.Controls.Add(this.label3);
this.groupBox1.Controls.Add(this.comboBox3);
this.groupBox1.Controls.Add(this.label2);
this.groupBox1.Controls.Add(this.comboBox2);
this.groupBox1.Controls.Add(this.comboBox1);
this.groupBox1.Controls.Add(this.studyBtn);
this.groupBox1.Controls.Add(this.typeLabel);
this.groupBox1.Controls.Add(this.label1);
this.groupBox1.Controls.Add(this.listBox1);
this.groupBox1.Location = new System.Drawing.Point(16, 16);
this.groupBox1.Name = "groupBox1";
this.groupBox1.Size = new System.Drawing.Size(656, 240);
this.groupBox1.TabIndex = 0;
this.groupBox1.TabStop = false;
this.groupBox1.Text = "学习内容";
//
// label3
//
this.label3.Location = new System.Drawing.Point(392, 152);
this.label3.Name = "label3";
this.label3.Size = new System.Drawing.Size(104, 23);
this.label3.TabIndex = 9;
this.label3.Text = "记录计算临时表:";
//
// comboBox3
//
this.comboBox3.Items.AddRange(new object[] {
"是",
"否"});
this.comboBox3.Location = new System.Drawing.Point(512, 152);
this.comboBox3.Name = "comboBox3";
this.comboBox3.Size = new System.Drawing.Size(121, 20);
this.comboBox3.TabIndex = 8;
this.comboBox3.Text = "否";
//
// label2
//
this.label2.Location = new System.Drawing.Point(160, 152);
this.label2.Name = "label2";
this.label2.Size = new System.Drawing.Size(72, 23);
this.label2.TabIndex = 7;
this.label2.Text = "统计单字词";
//
// comboBox2
//
this.comboBox2.Items.AddRange(new object[] {
"是",
"否"});
this.comboBox2.Location = new System.Drawing.Point(256, 152);
this.comboBox2.Name = "comboBox2";
this.comboBox2.Size = new System.Drawing.Size(121, 20);
this.comboBox2.TabIndex = 6;
this.comboBox2.Text = "否";
//
// comboBox1
//
this.comboBox1.Items.AddRange(new object[] {
"正向搜索",
"逆向搜索",
"双向搜索"});
this.comboBox1.Location = new System.Drawing.Point(16, 152);
this.comboBox1.Name = "comboBox1";
this.comboBox1.Size = new System.Drawing.Size(121, 20);
this.comboBox1.TabIndex = 5;
this.comboBox1.Text = "逆向搜索";
//
// studyBtn
//
this.studyBtn.Location = new System.Drawing.Point(288, 208);
this.studyBtn.Name = "studyBtn";
this.studyBtn.TabIndex = 3;
this.studyBtn.Text = "学习";
this.studyBtn.Click += new System.EventHandler(this.studyBtn_Click);
//
// typeLabel
//
this.typeLabel.Location = new System.Drawing.Point(264, 16);
this.typeLabel.Name = "typeLabel";
this.typeLabel.TabIndex = 2;
this.typeLabel.Text = "类别名";
this.typeLabel.TextAlign = System.Drawing.ContentAlignment.MiddleCenter;
//
// label1
//
this.label1.Location = new System.Drawing.Point(8, 56);
this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(72, 23);
this.label1.TabIndex = 1;
this.label1.Text = "文件列表:";
//
// listBox1
//
this.listBox1.ItemHeight = 12;
this.listBox1.Location = new System.Drawing.Point(88, 48);
this.listBox1.Name = "listBox1";
this.listBox1.Size = new System.Drawing.Size(544, 88);
this.listBox1.TabIndex = 0;
//
// groupBox2
//
this.groupBox2.Controls.Add(this.mesBox);
this.groupBox2.Location = new System.Drawing.Point(16, 272);
this.groupBox2.Name = "groupBox2";
this.groupBox2.Size = new System.Drawing.Size(656, 272);
this.groupBox2.TabIndex = 1;
this.groupBox2.TabStop = false;
this.groupBox2.Text = "学习状态";
//
// mesBox
//
this.mesBox.Location = new System.Drawing.Point(88, 32);
this.mesBox.Multiline = true;
this.mesBox.Name = "mesBox";
this.mesBox.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
this.mesBox.Size = new System.Drawing.Size(512, 224);
this.mesBox.TabIndex = 0;
this.mesBox.Text = "textBox1";
//
// prevBtn
//
this.prevBtn.Location = new System.Drawing.Point(304, 568);
this.prevBtn.Name = "prevBtn";
this.prevBtn.TabIndex = 3;
this.prevBtn.Text = "上一步";
this.prevBtn.Click += new System.EventHandler(this.prevBtn_Click);
//
// studyLearn
//
this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
this.ClientSize = new System.Drawing.Size(688, 629);
this.Controls.Add(this.prevBtn);
this.Controls.Add(this.groupBox2);
this.Controls.Add(this.groupBox1);
this.Name = "studyLearn";
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "统计学习";
this.Load += new System.EventHandler(this.studyLearn_Load);
this.groupBox1.ResumeLayout(false);
this.groupBox2.ResumeLayout(false);
this.ResumeLayout(false);
}
#endregion
///
/// 初始化Form
///
///
///
private void studyLearn_Load(object sender, System.EventArgs e)
{
typeLabel.Text=type;
BindList();
}
///
/// 绑定文件列表
///
private void BindList()
{
listBox1.Items.Clear();
for(int i=0;i
/// 学习按钮点击
///
///
///
private void studyBtn_Click(object sender, System.EventArgs e)
{
//初始化部分
//保存单字词标志设置
//设置学习按钮不能操作
studyBtn.Enabled=false;
if(comboBox2.Text=="是")
{
saveSingle=true;
}
else
{
saveSingle=false;
}
//是否记录临时表
if(comboBox3.Text=="是")
{
recTempTable=true;
}
else
{
recTempTable=false;
}
//搜索方式设置
if(comboBox1.Text=="正向搜索")
{
searchType=0;
}
else if(comboBox1.Text=="逆向搜索")
{
searchType=1;
}
else if(comboBox1.Text=="双向搜索")
{
searchType=2;
}
else
{
searchType=1;
}
//初始化结束
OleDbConnection sqlCon=db.dbOpertation.getConnection();
sqlCon.Open();
string cmd;
//更新文件记录部分
if(ds==null)
{
ds=new DataSet();
}
cmd="select * from scanFile";
OleDbDataAdapter sqlAd=new OleDbDataAdapter(cmd,sqlCon);
sqlAd.Fill(ds,"fileName");
OleDbCommand sqlCom;
sqlCom=new OleDbCommand();
sqlCom.Connection=sqlCon;
cmd="select sum(times) as jj from scanFile";
sqlCom.CommandText=cmd;
OleDbDataReader sqlRead=sqlCom.ExecuteReader();
if(sqlRead.Read())
{
string sst;
sst=sqlRead["jj"].ToString();
if(sst.Trim()=="")
{
articleSum=0;
}
else
{
articleSum=Convert.ToInt32(sst);
}
sqlRead.Close();
cmd="select sum(times) as jj from scanFile where typeName=\'"+typeLabel.Text+"\'";
sqlCom.CommandText=cmd;
sqlRead=sqlCom.ExecuteReader();
if(sqlRead.Read())
{
sst=sqlRead["jj"].ToString();
if(sst.Trim()=="")
{
typeSum=0;
}
else
{
typeSum=Convert.ToInt32(sst);
}
sqlRead.Close();
sqlCon.Close();
initClassifier();
studyBtn.Enabled=true;
}
else
{
sqlRead.Close();
sqlCon.Close();
MessageBox.Show("未读出类别文档总数!");
studyBtn.Enabled=true;
}
}
else
{
sqlRead.Close();
sqlCon.Close();
MessageBox.Show("未读出文档总数!");
studyBtn.Enabled=true;
}
//然后再在扫描文件时更新已扫描文件记录
//必须设置主键,否则无法用Find 方法 查找 DataRow
//DataColumn[] key=new DataColumn[1];
//key[0]=dt.Columns["sfno"];
//
//key[0].AutoIncrement=true;
//key[0].AutoIncrementStep=1;
//dt.PrimaryKey=key;
//更新到数据库
//更新文件记录结束
//启动扫描
//分类器初始化
// try
// {
// fp=new classifier.split.fileSpliter(fileList,sqlCon,d,typeLabel.Text,mesBox);
// fThread=new Thread(new ThreadStart(fp.readToEnd));
// fThread.Start();
//
// //启动前的延迟
// while(!fThread.IsAlive);
//
// fThread.Join();
//
// MessageBox.Show("计算结束");
// }
// catch(Exception ex)
// {
// MessageBox.Show("出现错误!");
// }
}
///
/// 分类器初始化
///
///
private bool initClassifier()
{
//空格不算终结符
sqlConnect=db.dbOpertation.getConnection();
if(sqlConnect.State==ConnectionState.Closed)
{
sqlConnect.Open();
}
//从数据库中读入终结符,并写入终结符字符数组中
string cmd="select * from endChar";
OleDbDataAdapter sqlAd=new OleDbDataAdapter(cmd,sqlConnect);
DataTable dt=new DataTable();
sqlAd.Fill(dt);
cmd="";
foreach( DataRow dr in dt.Rows)
{
cmd+=dr["ending"].ToString();
}
endChar=cmd.ToCharArray();
//填充需要使用的数据集,其中word表存储北大的词库,static存储统计词频的结果
if(ds1==null)
{
ds1=new DataSet();
}
if(ds1.Tables["word"]==null)
{
sqlAd.SelectCommand.CommandText="select * from words";
sqlAd.Fill(ds1,"word");
}
//填充二项表
//二项表用于统计文章中出现的词,如果某次出现一次,则出现该词的文章数+1
//
if(ds==null)
{
ds=new DataSet();
}
//是否记录临时表
// if(recTempTable)
// {
// sqlAd.SelectCommand.CommandText="select * from tempTable";
// sqlAd.Fill(ds,"word2Temp");
// ds.Tables["word2Temp"].Clear();
// }
// else
// {
if(ds.Tables["word2Temp"]==null)
{
if(!getTable("word2Temp"))
{
MessageBox.Show("创建临时表失败");
return false;
}
}
// }
//填充结束
char[] mm={'\r','\n',' '};
missChar=mm;
char[] nn={'0','1','2','3','4','5','6','7','8','9'};
number=nn;
char[] ww={'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'};
word=ww;
leftSplit=new ArrayList();
leftPos=new ArrayList();
leftFreq=new ArrayList();
rightSplit=new ArrayList();
rightPos=new ArrayList();
rightFreq=new ArrayList();
//splitResult=new ArrayList();
cmd="select sum(wfreq) as aa from words";
OleDbCommand sqlCom=new OleDbCommand(cmd,sqlConnect);
OleDbDataReader sqlRead=sqlCom.ExecuteReader();
if(sqlRead.Read())
{
denominator=Convert.ToInt32(sqlRead["aa"]);
sqlRead.Close();
sqlConnect.Close();
mesBox.Text="分类器初始化完毕\r\n";
// fThread=new Thread(new ThreadStart(this.readToEnd));
// fThread.Priority=ThreadPriority.Normal;
// fThread.Start();
// //启动前的延迟
// while(!fThread.IsAlive);
//
// fThread.Join();
//用线程太麻烦
readToEnd();
return true;
}
else
{
sqlRead.Close();
sqlConnect.Close();
mesBox.Text="分类器初始化失败\n";
return false;
}
}
///
/// 读入字句直到遇到一个终结符,英文单词与数字都算终结符
///
///
public void readToEnd()
{
//采用自动机原理实现识别过程
int status=0;
//计数器每3个文章扫描完后保存结果
int counter=0;
//lastChar是上一个字符,用于识别数字单词
char currentChar;
string result="";
StreamReader sr;
//对文件列表中的每一个文件进行处理
for(int ss=0;ss= 0)
{
//当前字符
currentChar=(char)sr.Read();
if(status==0)
{
if(isNumber(currentChar))
{
//数字
result=currentChar.ToString();
status=1;
}
else if(isWord(currentChar))
{
result=currentChar.ToString();
status=5;
}
else if(isChinese(currentChar) )
{
//中文
result=currentChar.ToString();
status=7;
}
else if(isEnd(currentChar))
{
result="";
status=0;
}
else if(isSpace(currentChar))
{
//空格,不改变状态,继续读
result="";
status=0;
}
else
{
result="";
status=0;
}
}
else if(status==1)
{
//无小数数字,数字不保存
if(isNumber(currentChar))
{
result+=currentChar;
//状态不变
}
else if(currentChar=='.')
{
result+=currentChar;
status=2;
}
else if(isWord(currentChar))
{
//识别出数字
//splitResult.Add(result);
//对二项表进行更新,表示该文章中出现这个词,出现这个词的文章数加1
//updateTable(result,fileList[ss]);
result=currentChar.ToString();
status=5;
}
else if(isChinese(currentChar))
{
//识别出数字
//splitResult.Add(result);
//updateTable(result,fileList[ss]);
result=currentChar.ToString();
status=7;
}
// else if(isSpace(currentChar))
// {
// splitResult.Add(result);
// result="";
// status=0;
// }
// else if(isEnd(currentChar))
// {
// splitResult.Add(result);
// result="";
// status=0;
// }
else
{
//数字其实可以不保存
//splitResult.Add(result);
//updateTable(result,fileList[ss]);
result="";
status=0;
}
}
else if(status==2)
{
//有小数数字,数字不保存
if(isNumber(currentChar))
{
result+=currentChar;
//状态不变
}
else if(isWord(currentChar))
{
//splitResult.Add(result);
//updateTable(result,fileList[ss]);
result=currentChar.ToString();
status=5;
}
else if(isChinese(currentChar))
{
//splitResult.Add(result);
//updateTable(result,fileList[ss]);
result=currentChar.ToString();
status=7;
}
// else if(isSpace(currentChar))
// {
// splitResult.Add(result);
// result="";
// status=0;
// }
// else if(isEnd(currentChar))
// {
// splitResult.Add(result);
// result="";
// status=0;
// }
else
{
//其他情况,数字不保存
//splitResult.Add(result);
//updateTable(result,fileList[ss]);
result="";
status=0;
}
}
else if(status==5)
{
if(isWord(currentChar))
{
result+=currentChar;
//status不变
}
else if(isNumber(currentChar))
{
//splitResult.Add(result);
updateTable(result,fileList[ss]);
result=currentChar.ToString();
status=1;
}
else if(isChinese(currentChar))
{
//splitResult.Add(result);
updateTable(result,fileList[ss]);
result=currentChar.ToString();
status=7;
}
else
{
//状态6,识别为一个英文单词,并将状态重新置位0
//splitResult.Add(result);
updateTable(result,fileList[ss]);
result=currentChar.ToString();
status=0;
}
}
else if(status==7)
{
if(isChinese(currentChar))
{
//中文
result+=currentChar;
}
else if(isNumber(currentChar))
{
doSplit(result,sqlConnect,fileList[ss]);
result=currentChar.ToString();
status=1;
}
else if(isWord(currentChar))
{
doSplit(result,sqlConnect,fileList[ss]);
result=currentChar.ToString();
status=5;
}
else if(isSpace(currentChar))
{
//忽略空格
}
else
{
doSplit(result,sqlConnect,fileList[ss]);
result="";
status=0;
}
}
else
{
MessageBox.Show("出现意外状态!");
}
}
//对结尾情况的处理,只处理英文,中文,数字的情况
if(status==1)
{
//数字
//splitResult.Add(result);
updateTable(result,fileList[ss]);
}
else if(status==2)
{
//带小数点的数字
//splitResult.Add(result);
updateTable(result,fileList[ss]);
}
else if(status==5)
{
//单词
//splitResult.Add(result);
updateTable(result,fileList[ss]);
}
else if(status==7)
{
//汉字
doSplit(result,sqlConnect,fileList[ss]);
}
else
{
}
counter++;
//更新文件列表
updateFileTable(fileList[ss]);
//文章总数加1
articleSum+=1;
typeSum+=1;
//每三个文章,或者到文件列表末尾保存一次,防止临时表数据过多
if(counter>=3||ss==this.fileList.Length-1)
{
save2DB();
ds.Tables["word2Temp"].Clear();
counter=0;
}
threadMsg="文件:"+fileList[ss]+" 处理完毕\r\n";
mesBox.Text+=threadMsg;
// Thread msgThread=new Thread(new ThreadStart(this.setMessage));
// msgThread.Priority=ThreadPriority.Highest;
// msgThread.Start();
// //启动前的延迟
// while(!msgThread.IsAlive);
// msgThread.Join();
sr.Close();
}
studyBtn.Enabled=true;
//结果更新到数据库
// string ccc=type;
//saveDB();
}
private void setMessage()
{
mesBox.Text+=threadMsg;
}
#region 判断类型函数
private bool isSpace(char currentChar)
{
if(currentChar==' ')
{
return true;
}
else
{
return false;
}
}
private bool isChinese(char currentChar)
{
if(currentChar>=0x4e00 && currentChar<=0x9fa5)
{
return true;
}
else
{
return false;
}
}
private bool isNumber(char currentChar)
{
if(currentChar>='0' && currentChar<='9')
{
return true;
}
else
{
return false;
}
}
private bool isWord(char currentChar)
{
if(currentChar>='a' && currentChar<='z' || currentChar>='A' && currentChar<='Z')
{
return true;
}
else
{
return false;
}
}
//检查字符是否是终结符
private bool isEnd(char currentChar)
{
for(int i=0;i0)
{
//currentPos即剩余串的长度
if(currentPos<=4)
{
subStr=sourceLine.Substring(0,currentPos);
subStr=backCheckSub(subStr);
currentPos-=subStr.Length;
rightPos.Add(currentPos);
}
else
{
subStr=sourceLine.Substring(currentPos-4,4);
subStr=backCheckSub(subStr);
currentPos-=subStr.Length;
rightPos.Add(currentPos);
}
rightSplit.Add(subStr);
if(searchType==1)
{
if(!saveSingle&&subStr.Length<=1)
{
}
else
{
//splitResult.Add(subStr);
updateTable(subStr,file);
}
}
}
}
//计算混合概率密度的变量
int i,j,p;
//用于存储前向、后向扫描最大概率
float pRight=1,pLeft=1;
int lastPosI=0,lastPosJ=rightPos.Count-1;
if(searchType==2)
{
i=1;
j=rightPos.Count-2;
while(i=0)
{
int ll=Convert.ToInt32(leftPos[i]);
int rr=Convert.ToInt32(rightPos[j]);
if(ll>rr)
{
pRight*=((float)(Convert.ToInt32(rightFreq[j])))/denominator;
j--;
}
else if(llj;p--)
{
subStr=(string)rightSplit[p];
if(!saveSingle&&subStr.Length<=1)
{
}
else
{
//splitResult.Add(subStr);
updateTable(subStr,file);
}
//splitResult.Add(rightSplit[p]);
}
//记录结点位置
lastPosI=i;
lastPosJ=j;
i++;
j--;
}
else
{
//正向搜索的概率大
for(p=lastPosI;pj;p--)
{
subStr=(string)rightSplit[p];
if(!saveSingle&&subStr.Length<=1)
{
}
else
{
//splitResult.Add(subStr);
updateTable(subStr,file);
}
//splitResult.Add(rightSplit[p]);
}
}
else
{
//正向搜索的概率大
for(p=lastPosI+1;p0)
{
leftFreq.Add(drs[0]["wfreq"].ToString());
return subString;
}
else
{
leftFreq.Add("0");
return subString;
}
}
else
{
DataRow[] drs=ds1.Tables["word"].Select("word=\'"+subString+"\'");
if(drs.Length>0)
{
leftFreq.Add(drs[0]["wfreq"].ToString());
return subString;
}
else
{
return foreCheckSub(subString.Substring(0,subString.Length-1));
}
}
}
//逆向递归搜索程序
private string backCheckSub(string subString)
{
if(subString.Length<=1)
{
DataRow[] drs=ds1.Tables["word"].Select("word=\'"+subString+"\'");
if(drs.Length>0)
{
rightFreq.Add(drs[0]["wfreq"].ToString());
return subString;
}
else
{
rightFreq.Add("0");
return subString;
}
}
else
{
DataRow[] drs=ds1.Tables["word"].Select("word=\'"+subString+"\'");
if(drs.Length>0)
{
rightFreq.Add(drs[0]["wfreq"].ToString());
return subString;
}
else
{
return backCheckSub(subString.Substring(1,subString.Length-1));
}
}
}
#endregion
#region 临时表操作
///
/// 创建临时表
///
///
///
private bool getTable(string name)
{
try
{
ds.Tables.Add(name);
DataTable dt=ds.Tables[name];
//创建自增列
DataColumn dc=new DataColumn("lrno");
dc.AutoIncrementStep=1;
dc.AutoIncrement=true;
dc.DataType=System.Type.GetType("System.Int32");
dt.Columns.Add(dc);
dc=new DataColumn("word");
dc.DataType=System.Type.GetType("System.String");
dt.Columns.Add(dc);
dc=new DataColumn("times");
dc.DataType=System.Type.GetType("System.Int32");
dt.Columns.Add(dc);
dc=new DataColumn("file");
dc.DataType=System.Type.GetType("System.String");
dt.Columns.Add(dc);
dc=new DataColumn("sum");
dc.DataType=System.Type.GetType("System.Int32");
dt.Columns.Add(dc);
return true;
}
catch(Exception ex)
{
MessageBox.Show("创建临时表失败:"+ex.Message);
return false;
}
}
private void updateFileTable(string file)
{
DataRow [] drs;
DataRow dr;
string selectcmd;
//更新扫描文档记录的表
selectcmd="filePath=\'"+file+"\'";
drs=ds.Tables["fileName"].Select(selectcmd);
if(drs.Length==0)
{
dr=ds.Tables["fileName"].NewRow();
dr["filePath"]=file;
dr["typeName"]=typeLabel.Text;
dr["times"]="1";
ds.Tables["fileName"].Rows.Add(dr);
}
else
{
drs[0]["times"]=Convert.ToInt32(drs[0]["times"])+1;
}
}
///
/// 更新临时表
///
///
///
private void updateTable(string word,string file)
{
//drs=ds.Tables["word2Temp"].Select("word=\'"+result+"\' and file!=\'"+fileList[ss]+"\'");
DataRow [] drs,drs2;
DataRow dr;
//不等于是<>
string selectcmd="word=\'"+word+"\'";
drs=ds.Tables["word2Temp"].Select(selectcmd);
int j;
//更新记录词出现次数的临时表
if(drs.Length>0)
{
//存在这个词
selectcmd="word=\'"+word+"\' and file<>\'"+file+"\'";
drs2=ds.Tables["word2Temp"].Select(selectcmd);
if(drs2.Length>0)
{
//存在这个词,且文件不同时,出现次数增一
j=Convert.ToInt32(drs[0]["times"]);
j++;
drs[0]["times"]=j;
drs[0]["file"]=file;
}
}
else
{
//不存在这个词,插入
dr=ds.Tables["word2Temp"].NewRow();
dr["word"]=word;
dr["times"]=1;
dr["file"]=file;
ds.Tables["word2Temp"].Rows.Add(dr);
}
}
#endregion
#region 统计保存结果
//保存二项分类结果
private void save2DB()
{
string cmd;
OleDbDataAdapter sqlAd,sqlAd2,sqlAd3;
DataRow dr,dr2;
DataRow[] drs;
int j,k,wSum;
string word;
// OleDbTransaction sqlTran;
// sqlTran=sqlConnect.BeginTransaction();
//二项模型表
cmd="select * from "+type+"2";
sqlAd=new OleDbDataAdapter(cmd,sqlConnect);
sqlAd.Fill(ds,"word2");
// OleDbCommandBuilder sqlBuilder1;
// //临时表
// if(recTempTable)
// {
// sqlAd1=new OleDbDataAdapter("select * from tempTable",sqlConnect);
// sqlBuilder1=new OleDbCommandBuilder(sqlAd1);
// }
//词频汇总表
cmd="select * from wordSum2";
sqlAd2=new OleDbDataAdapter(cmd,sqlConnect);
sqlAd2.Fill(ds,"wordSum2");
//扫描文章记录表
sqlAd3=new OleDbDataAdapter("select * from scanFile",sqlConnect);
sqlAd3.Fill(ds,"fileName");
OleDbCommandBuilder sqlBuilder=new OleDbCommandBuilder(sqlAd);
OleDbCommandBuilder sqlBuilder2=new OleDbCommandBuilder(sqlAd2);
OleDbCommandBuilder sqlBuilder3=new OleDbCommandBuilder(sqlAd3);
for(int i=0;i0)
{
j=Convert.ToInt32(drs[0]["times"]);
j=j+k;
drs[0]["times"]=j;
//将该词的出现总次数记入temp表,用于计算互信息
ds.Tables["word2Temp"].Rows[i][4]=j;
wSum=j;
}
else
{
dr2=ds.Tables["wordSum2"].NewRow();
dr2["word"]=word;
dr2["times"]=k;
ds.Tables["wordSum2"].Rows.Add(dr2);
//将该词的出现总次数记入temp表,用于计算互信息
ds.Tables["word2Temp"].Rows[i][4]=1;
//没出现过这个词,则词的总出现次数即为当前的出现次数
wSum=k;
}
//互信息不在此处计算
//还要将互信息记录到类别表中
cmd="word=\'"+word+"\'";
drs=ds.Tables["word2"].Select(cmd);
if(drs.Length>0)
{
j=Convert.ToInt32(drs[0]["times"]);
j=j+k;
drs[0]["times"]=j;
// cal=j*articleSum/(typeSum*wSum);
// MI=Math.Log(cal);
// drs[0]["MI"]=MI;
}
else
{
dr2=ds.Tables["word2"].NewRow();
dr2["word"]=word;
dr2["times"]=k;
// cal=k*articleSum/(typeSum*wSum);
// MI=Math.Log(cal);
// dr2["MI"]=MI;
ds.Tables["word2"].Rows.Add(dr2);
}
}
//更新到数据库
//这部分最容易出错,可以在这里设断点
try
{
// if(recTempTable)
// {
// sqlAd1.Update(ds,"word2Temp");
// ds.Tables["word2Temp"].AcceptChanges();
// }
sqlAd3.Update(ds,"fileName");
ds.Tables["fileName"].AcceptChanges();
//ds.Tables["fileName"].Clear();
ds.Tables["fileName"].Clear();
sqlAd.Update(ds,"word2");
ds.Tables["word2"].AcceptChanges();
ds.Tables["word2"].Clear();
sqlAd2.Update(ds,"wordSum2");
ds.Tables["wordSum2"].AcceptChanges();
ds.Tables["wordSum2"].Clear();
}
catch(Exception ex)
{
string ccc;
OleDbCommand sqlCom=sqlBuilder.GetInsertCommand();
ccc=sqlCom.CommandText;
sqlCom=sqlBuilder2.GetInsertCommand();
ccc=sqlCom.CommandText;
sqlCom=sqlBuilder3.GetInsertCommand();
ccc=sqlCom.CommandText;
MessageBox.Show("更新数据库出现问题:"+ex.Message);
}
}
#endregion
private void prevBtn_Click(object sender, System.EventArgs e)
{
try
{
Form f=new studySelect();
f.MdiParent=this.MdiParent;
f.Show();
this.Close();
}
catch(Exception ex)
{
MessageBox.Show(ex.Message);
}
}
}
}