www.pudn.com > classifier.rar > studyLearn.cs


using System; 
using System.Drawing; 
using System.Collections; 
using System.ComponentModel; 
using System.Windows.Forms; 
using System.Data.OleDb; 
using System.Data; 
using System.Threading; 
using System.IO; 
 
namespace classifier 
{ 
	///  
	/// studyLearn 的摘要说明。 
	///  
	public class studyLearn : System.Windows.Forms.Form 
	{ 
		private System.ComponentModel.Container components = null; 
		private System.Windows.Forms.GroupBox groupBox1; 
		private System.Windows.Forms.GroupBox groupBox2; 
		private System.Windows.Forms.ListBox listBox1; 
		private System.Windows.Forms.Label label1; 
		private System.Windows.Forms.Label typeLabel; 
		private System.Windows.Forms.Button studyBtn; 
		private System.Windows.Forms.TextBox mesBox; 
		private string [] fileList; 
		private string type;		 
		private Thread fThread; 
		private string threadMsg; 
 
		//分类器部分		 
		private double articleSum,typeSum; 
		private bool recTempTable;	//是否记录临时表 
		private int searchType;		//正向搜索,逆向搜索,双向搜索 
		private OleDbConnection sqlConnect;		 
		private char[] endChar;		//终结符 
		private char[] missChar;	//忽略符 
		private char[] word;		//英文字母 
		private char[] number;		//数字 
		private ArrayList leftSplit,rightSplit,leftPos,rightPos,leftFreq,rightFreq; 
		private DataSet ds; 
		private DataSet ds1; 
		private int denominator; 
		private System.Windows.Forms.ComboBox comboBox1; 
		private System.Windows.Forms.ComboBox comboBox2; 
		private bool saveSingle; 
		private System.Windows.Forms.Label label2; 
		private System.Windows.Forms.ComboBox comboBox3; 
		private System.Windows.Forms.Button prevBtn; 
		private System.Windows.Forms.Label label3;	//是否保存单个字的单词的标志		 
		 
 
		public studyLearn() 
		{ 
			// 
			// Windows 窗体设计器支持所必需的 
			// 
			InitializeComponent(); 
 
			// 
			// TODO: 在 InitializeComponent 调用后添加任何构造函数代码 
			// 
		} 
 
 
		//自定构造函数 
		public studyLearn(string [] fileSrc,string typeSrc) 
		{ 
			InitializeComponent(); 
 
			fileList=fileSrc; 
			type=typeSrc;			 
		} 
 
		 
 
		///  
		/// 清理所有正在使用的资源。 
		///  
		///  
		protected override void Dispose( bool disposing ) 
		{ 
			if( disposing ) 
			{ 
				if(components != null) 
				{ 
					components.Dispose(); 
				} 
			} 
			base.Dispose( disposing ); 
		} 
 
		#region Windows 窗体设计器生成的代码 
		///  
		/// 设计器支持所需的方法 - 不要使用代码编辑器修改 
		/// 此方法的内容。 
		///  
		private void InitializeComponent() 
		{ 
			this.groupBox1 = new System.Windows.Forms.GroupBox(); 
			this.label3 = new System.Windows.Forms.Label(); 
			this.comboBox3 = new System.Windows.Forms.ComboBox(); 
			this.label2 = new System.Windows.Forms.Label(); 
			this.comboBox2 = new System.Windows.Forms.ComboBox(); 
			this.comboBox1 = new System.Windows.Forms.ComboBox(); 
			this.studyBtn = new System.Windows.Forms.Button(); 
			this.typeLabel = new System.Windows.Forms.Label(); 
			this.label1 = new System.Windows.Forms.Label(); 
			this.listBox1 = new System.Windows.Forms.ListBox(); 
			this.groupBox2 = new System.Windows.Forms.GroupBox(); 
			this.mesBox = new System.Windows.Forms.TextBox(); 
			this.prevBtn = new System.Windows.Forms.Button(); 
			this.groupBox1.SuspendLayout(); 
			this.groupBox2.SuspendLayout(); 
			this.SuspendLayout(); 
			//  
			// groupBox1 
			//  
			this.groupBox1.Controls.Add(this.label3); 
			this.groupBox1.Controls.Add(this.comboBox3); 
			this.groupBox1.Controls.Add(this.label2); 
			this.groupBox1.Controls.Add(this.comboBox2); 
			this.groupBox1.Controls.Add(this.comboBox1); 
			this.groupBox1.Controls.Add(this.studyBtn); 
			this.groupBox1.Controls.Add(this.typeLabel); 
			this.groupBox1.Controls.Add(this.label1); 
			this.groupBox1.Controls.Add(this.listBox1); 
			this.groupBox1.Location = new System.Drawing.Point(16, 16); 
			this.groupBox1.Name = "groupBox1"; 
			this.groupBox1.Size = new System.Drawing.Size(656, 240); 
			this.groupBox1.TabIndex = 0; 
			this.groupBox1.TabStop = false; 
			this.groupBox1.Text = "学习内容"; 
			//  
			// label3 
			//  
			this.label3.Location = new System.Drawing.Point(392, 152); 
			this.label3.Name = "label3"; 
			this.label3.Size = new System.Drawing.Size(104, 23); 
			this.label3.TabIndex = 9; 
			this.label3.Text = "记录计算临时表:"; 
			//  
			// comboBox3 
			//  
			this.comboBox3.Items.AddRange(new object[] { 
														   "是", 
														   "否"}); 
			this.comboBox3.Location = new System.Drawing.Point(512, 152); 
			this.comboBox3.Name = "comboBox3"; 
			this.comboBox3.Size = new System.Drawing.Size(121, 20); 
			this.comboBox3.TabIndex = 8; 
			this.comboBox3.Text = "否"; 
			//  
			// label2 
			//  
			this.label2.Location = new System.Drawing.Point(160, 152); 
			this.label2.Name = "label2"; 
			this.label2.Size = new System.Drawing.Size(72, 23); 
			this.label2.TabIndex = 7; 
			this.label2.Text = "统计单字词"; 
			//  
			// comboBox2 
			//  
			this.comboBox2.Items.AddRange(new object[] { 
														   "是", 
														   "否"}); 
			this.comboBox2.Location = new System.Drawing.Point(256, 152); 
			this.comboBox2.Name = "comboBox2"; 
			this.comboBox2.Size = new System.Drawing.Size(121, 20); 
			this.comboBox2.TabIndex = 6; 
			this.comboBox2.Text = "否"; 
			//  
			// comboBox1 
			//  
			this.comboBox1.Items.AddRange(new object[] { 
														   "正向搜索", 
														   "逆向搜索", 
														   "双向搜索"}); 
			this.comboBox1.Location = new System.Drawing.Point(16, 152); 
			this.comboBox1.Name = "comboBox1"; 
			this.comboBox1.Size = new System.Drawing.Size(121, 20); 
			this.comboBox1.TabIndex = 5; 
			this.comboBox1.Text = "逆向搜索"; 
			//  
			// studyBtn 
			//  
			this.studyBtn.Location = new System.Drawing.Point(288, 208); 
			this.studyBtn.Name = "studyBtn"; 
			this.studyBtn.TabIndex = 3; 
			this.studyBtn.Text = "学习"; 
			this.studyBtn.Click += new System.EventHandler(this.studyBtn_Click); 
			//  
			// typeLabel 
			//  
			this.typeLabel.Location = new System.Drawing.Point(264, 16); 
			this.typeLabel.Name = "typeLabel"; 
			this.typeLabel.TabIndex = 2; 
			this.typeLabel.Text = "类别名"; 
			this.typeLabel.TextAlign = System.Drawing.ContentAlignment.MiddleCenter; 
			//  
			// label1 
			//  
			this.label1.Location = new System.Drawing.Point(8, 56); 
			this.label1.Name = "label1"; 
			this.label1.Size = new System.Drawing.Size(72, 23); 
			this.label1.TabIndex = 1; 
			this.label1.Text = "文件列表:"; 
			//  
			// listBox1 
			//  
			this.listBox1.ItemHeight = 12; 
			this.listBox1.Location = new System.Drawing.Point(88, 48); 
			this.listBox1.Name = "listBox1"; 
			this.listBox1.Size = new System.Drawing.Size(544, 88); 
			this.listBox1.TabIndex = 0; 
			//  
			// groupBox2 
			//  
			this.groupBox2.Controls.Add(this.mesBox); 
			this.groupBox2.Location = new System.Drawing.Point(16, 272); 
			this.groupBox2.Name = "groupBox2"; 
			this.groupBox2.Size = new System.Drawing.Size(656, 272); 
			this.groupBox2.TabIndex = 1; 
			this.groupBox2.TabStop = false; 
			this.groupBox2.Text = "学习状态"; 
			//  
			// mesBox 
			//  
			this.mesBox.Location = new System.Drawing.Point(88, 32); 
			this.mesBox.Multiline = true; 
			this.mesBox.Name = "mesBox"; 
			this.mesBox.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; 
			this.mesBox.Size = new System.Drawing.Size(512, 224); 
			this.mesBox.TabIndex = 0; 
			this.mesBox.Text = "textBox1"; 
			//  
			// prevBtn 
			//  
			this.prevBtn.Location = new System.Drawing.Point(304, 568); 
			this.prevBtn.Name = "prevBtn"; 
			this.prevBtn.TabIndex = 3; 
			this.prevBtn.Text = "上一步"; 
			this.prevBtn.Click += new System.EventHandler(this.prevBtn_Click); 
			//  
			// studyLearn 
			//  
			this.AutoScaleBaseSize = new System.Drawing.Size(6, 14); 
			this.ClientSize = new System.Drawing.Size(688, 629); 
			this.Controls.Add(this.prevBtn); 
			this.Controls.Add(this.groupBox2); 
			this.Controls.Add(this.groupBox1); 
			this.Name = "studyLearn"; 
			this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; 
			this.Text = "统计学习"; 
			this.Load += new System.EventHandler(this.studyLearn_Load); 
			this.groupBox1.ResumeLayout(false); 
			this.groupBox2.ResumeLayout(false); 
			this.ResumeLayout(false); 
 
		} 
		#endregion 
 
 
 
		///  
		/// 初始化Form 
		///  
		///  
		///  
		private void studyLearn_Load(object sender, System.EventArgs e) 
		{ 
			typeLabel.Text=type; 
			BindList(); 
		} 
 
		///  
		/// 绑定文件列表 
		///  
		private void BindList() 
		{ 
			listBox1.Items.Clear();			 
			 
			for(int i=0;i 
		/// 学习按钮点击 
		///  
		///  
		///  
		private void studyBtn_Click(object sender, System.EventArgs e) 
		{ 
			//初始化部分 
			//保存单字词标志设置 
			//设置学习按钮不能操作 
			studyBtn.Enabled=false; 
 
			if(comboBox2.Text=="是") 
			{ 
				saveSingle=true; 
			} 
			else 
			{ 
				saveSingle=false; 
			} 
 
			//是否记录临时表			 
			if(comboBox3.Text=="是") 
			{ 
				recTempTable=true; 
			} 
			else 
			{ 
				recTempTable=false; 
			} 
 
			//搜索方式设置 
			if(comboBox1.Text=="正向搜索") 
			{ 
				searchType=0; 
			} 
			else if(comboBox1.Text=="逆向搜索") 
			{ 
				searchType=1; 
			} 
			else if(comboBox1.Text=="双向搜索") 
			{ 
				searchType=2; 
			} 
			else 
			{ 
				searchType=1; 
			} 
 
 
 
			//初始化结束 
			 
 
			OleDbConnection sqlCon=db.dbOpertation.getConnection(); 
			sqlCon.Open(); 
			string cmd; 
 
			 
			//更新文件记录部分 
			if(ds==null) 
			{ 
				ds=new DataSet(); 
			} 
			 
			cmd="select * from scanFile"; 
			 
			OleDbDataAdapter sqlAd=new OleDbDataAdapter(cmd,sqlCon); 
			sqlAd.Fill(ds,"fileName"); 
			 
			OleDbCommand sqlCom; 
			sqlCom=new OleDbCommand(); 
			sqlCom.Connection=sqlCon; 
			cmd="select sum(times) as jj from scanFile"; 
			sqlCom.CommandText=cmd; 
			OleDbDataReader sqlRead=sqlCom.ExecuteReader(); 
			if(sqlRead.Read()) 
			{ 
				string sst; 
				sst=sqlRead["jj"].ToString(); 
				if(sst.Trim()=="") 
				{ 
					articleSum=0; 
				} 
				else 
				{ 
					articleSum=Convert.ToInt32(sst); 
				} 
				sqlRead.Close(); 
				cmd="select sum(times) as jj from scanFile where typeName=\'"+typeLabel.Text+"\'"; 
				sqlCom.CommandText=cmd; 
				sqlRead=sqlCom.ExecuteReader(); 
				if(sqlRead.Read()) 
				{ 
					sst=sqlRead["jj"].ToString(); 
					if(sst.Trim()=="") 
					{ 
						typeSum=0; 
					} 
					else 
					{ 
						typeSum=Convert.ToInt32(sst); 
					} 
					sqlRead.Close(); 
					sqlCon.Close(); 
					initClassifier(); 
					studyBtn.Enabled=true; 
				} 
				else 
				{ 
					sqlRead.Close(); 
					sqlCon.Close(); 
					MessageBox.Show("未读出类别文档总数!"); 
					studyBtn.Enabled=true; 
				}				 
			} 
			else 
			{ 
				sqlRead.Close(); 
				sqlCon.Close(); 
				MessageBox.Show("未读出文档总数!"); 
				studyBtn.Enabled=true; 
			} 
			 
			//然后再在扫描文件时更新已扫描文件记录 
			 
			 
			//必须设置主键,否则无法用Find 方法 查找 DataRow 
			//DataColumn[] key=new DataColumn[1]; 
			//key[0]=dt.Columns["sfno"]; 
			//			 
			//key[0].AutoIncrement=true; 
			//key[0].AutoIncrementStep=1; 
			//dt.PrimaryKey=key; 
			 
 
			//更新到数据库 
			//更新文件记录结束 
			//启动扫描 
			//分类器初始化 
			 
//				try 
//				{ 
//					fp=new classifier.split.fileSpliter(fileList,sqlCon,d,typeLabel.Text,mesBox); 
//					fThread=new Thread(new ThreadStart(fp.readToEnd));				 
//					fThread.Start(); 
// 
//					//启动前的延迟 
//					while(!fThread.IsAlive); 
//					 
//					fThread.Join(); 
// 
//					MessageBox.Show("计算结束"); 
//				} 
//				catch(Exception ex) 
//				{ 
//					MessageBox.Show("出现错误!"); 
//				} 
		} 
 
 
		///  
		/// 分类器初始化 
		///  
		///  
		private bool initClassifier() 
		{ 
			 
			//空格不算终结符 
			sqlConnect=db.dbOpertation.getConnection(); 
 
			if(sqlConnect.State==ConnectionState.Closed) 
			{ 
				sqlConnect.Open(); 
			}					 
			 
			//从数据库中读入终结符,并写入终结符字符数组中 
 
			string cmd="select * from endChar"; 
			OleDbDataAdapter sqlAd=new OleDbDataAdapter(cmd,sqlConnect); 
			DataTable dt=new DataTable(); 
			sqlAd.Fill(dt); 
			cmd=""; 
			foreach( DataRow dr in dt.Rows) 
			{ 
				cmd+=dr["ending"].ToString(); 
			} 
 
			endChar=cmd.ToCharArray(); 
 
			//填充需要使用的数据集,其中word表存储北大的词库,static存储统计词频的结果 
			if(ds1==null) 
			{ 
				ds1=new DataSet(); 
			} 
 
			if(ds1.Tables["word"]==null) 
			{ 
				sqlAd.SelectCommand.CommandText="select * from words"; 
				sqlAd.Fill(ds1,"word"); 
			} 
 
			//填充二项表 
			//二项表用于统计文章中出现的词,如果某次出现一次,则出现该词的文章数+1 
			// 
			if(ds==null) 
			{ 
				ds=new DataSet(); 
			} 
			//是否记录临时表 
//			if(recTempTable) 
//			{ 
//				sqlAd.SelectCommand.CommandText="select * from tempTable"; 
//				sqlAd.Fill(ds,"word2Temp"); 
//				ds.Tables["word2Temp"].Clear(); 
//			} 
//			else 
//			{ 
 
			if(ds.Tables["word2Temp"]==null) 
			{ 
				if(!getTable("word2Temp")) 
				{ 
					MessageBox.Show("创建临时表失败"); 
					return false; 
				} 
			} 
 
//			} 
			 
			 
			//填充结束 
			 
			char[] mm={'\r','\n',' '}; 
 
			missChar=mm; 
 
			char[] nn={'0','1','2','3','4','5','6','7','8','9'}; 
			number=nn; 
 
			char[] ww={'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'}; 
			word=ww; 
 
			leftSplit=new ArrayList(); 
			leftPos=new ArrayList(); 
			leftFreq=new ArrayList(); 
			rightSplit=new ArrayList(); 
			rightPos=new ArrayList(); 
			rightFreq=new ArrayList(); 
			//splitResult=new ArrayList(); 
			 
			cmd="select sum(wfreq) as aa from words"; 
			OleDbCommand sqlCom=new OleDbCommand(cmd,sqlConnect); 
			OleDbDataReader sqlRead=sqlCom.ExecuteReader(); 
			if(sqlRead.Read()) 
			{ 
				denominator=Convert.ToInt32(sqlRead["aa"]); 
				sqlRead.Close(); 
				sqlConnect.Close(); 
				mesBox.Text="分类器初始化完毕\r\n"; 
 
//				fThread=new Thread(new ThreadStart(this.readToEnd)); 
//				fThread.Priority=ThreadPriority.Normal; 
//				fThread.Start(); 
//				//启动前的延迟 
//				while(!fThread.IsAlive); 
//									 
//				fThread.Join(); 
 
				//用线程太麻烦 
 
				readToEnd(); 
 
				return true; 
			} 
			else 
			{ 
				sqlRead.Close(); 
				sqlConnect.Close(); 
				mesBox.Text="分类器初始化失败\n"; 
				return false; 
			} 
			 
		} 
 
 
		 
		///  
		/// 读入字句直到遇到一个终结符,英文单词与数字都算终结符 
		///  
		///  
		public void readToEnd() 
		{ 
			//采用自动机原理实现识别过程 
			int status=0; 
			//计数器每3个文章扫描完后保存结果 
			int counter=0; 
			 
			//lastChar是上一个字符,用于识别数字单词			 
			char currentChar; 
			 
			string result=""; 
			 
			StreamReader sr; 
			 
			//对文件列表中的每一个文件进行处理 
			for(int ss=0;ss= 0)  
				{ 
					//当前字符 
					currentChar=(char)sr.Read();		 
				 
					if(status==0) 
					{ 
						if(isNumber(currentChar)) 
						{ 
							//数字 
							result=currentChar.ToString(); 
							status=1;						 
						} 
						else if(isWord(currentChar)) 
						{ 
							result=currentChar.ToString(); 
							status=5; 
						}					 
						else if(isChinese(currentChar) ) 
						{ 
							//中文 
							result=currentChar.ToString(); 
							status=7; 
						} 
						else if(isEnd(currentChar)) 
						{ 
							result=""; 
							status=0; 
						} 
						else if(isSpace(currentChar)) 
						{ 
							//空格,不改变状态,继续读 
							result=""; 
							status=0; 
						} 
						else 
						{ 
							result=""; 
							status=0; 
						} 
 
					} 
					else if(status==1) 
					{ 
						//无小数数字,数字不保存 
						if(isNumber(currentChar)) 
						{ 
							result+=currentChar; 
							//状态不变 
						} 
						else if(currentChar=='.') 
						{ 
							result+=currentChar; 
							status=2; 
						} 
						else if(isWord(currentChar)) 
						{ 
							//识别出数字 
							//splitResult.Add(result); 
 
							//对二项表进行更新,表示该文章中出现这个词,出现这个词的文章数加1 
							//updateTable(result,fileList[ss]);							 
 
							result=currentChar.ToString(); 
							status=5; 
				 
						} 
						else if(isChinese(currentChar)) 
						{ 
							//识别出数字 
							//splitResult.Add(result); 
							//updateTable(result,fileList[ss]);	 
							 
							result=currentChar.ToString(); 
							status=7; 
						} 
						//					else if(isSpace(currentChar)) 
						//					{ 
						//						splitResult.Add(result); 
						//						result=""; 
						//						status=0; 
						//					} 
						//					else if(isEnd(currentChar)) 
						//					{ 
						//						splitResult.Add(result); 
						//						result=""; 
						//						status=0; 
						//					} 
						else 
						{ 
							//数字其实可以不保存 
							//splitResult.Add(result); 
							//updateTable(result,fileList[ss]);	 
 
							result=""; 
							status=0; 
						} 
					} 
					else if(status==2) 
					{ 
						//有小数数字,数字不保存 
						if(isNumber(currentChar)) 
						{ 
							result+=currentChar; 
							//状态不变 
						} 
						else if(isWord(currentChar)) 
						{ 
							//splitResult.Add(result); 
							//updateTable(result,fileList[ss]);	 
 
							result=currentChar.ToString(); 
							status=5; 
						} 
						else if(isChinese(currentChar)) 
						{ 
							//splitResult.Add(result); 
							//updateTable(result,fileList[ss]);	 
 
							result=currentChar.ToString(); 
							status=7; 
						} 
							//					else if(isSpace(currentChar)) 
							//					{ 
							//						splitResult.Add(result); 
							//						result=""; 
							//						status=0; 
							//					} 
							//					else if(isEnd(currentChar)) 
							//					{ 
							//						splitResult.Add(result); 
							//						result=""; 
							//						status=0; 
							//					} 
						else 
						{ 
							//其他情况,数字不保存 
							//splitResult.Add(result); 
							//updateTable(result,fileList[ss]);	 
 
							result=""; 
							status=0; 
						}					 
					}				 
					else if(status==5) 
					{ 
						if(isWord(currentChar)) 
						{ 
							result+=currentChar; 
							//status不变 
						} 
						else if(isNumber(currentChar)) 
						{ 
							//splitResult.Add(result); 
							updateTable(result,fileList[ss]);	 
 
							result=currentChar.ToString(); 
							status=1; 
						} 
						else if(isChinese(currentChar)) 
						{ 
							//splitResult.Add(result); 
							updateTable(result,fileList[ss]);	 
 
							result=currentChar.ToString(); 
							status=7; 
						}						 
						else 
						{ 
							//状态6,识别为一个英文单词,并将状态重新置位0 
							//splitResult.Add(result); 
							updateTable(result,fileList[ss]);	 
 
							result=currentChar.ToString(); 
							status=0; 
						} 
					}				 
					else if(status==7) 
					{ 
						if(isChinese(currentChar)) 
						{ 
							//中文 
							result+=currentChar; 
						 
						} 
						else if(isNumber(currentChar)) 
						{ 
							doSplit(result,sqlConnect,fileList[ss]); 
							result=currentChar.ToString(); 
							status=1; 
						} 
						else if(isWord(currentChar))	 
						{ 
							doSplit(result,sqlConnect,fileList[ss]); 
							result=currentChar.ToString(); 
							status=5; 
						} 
						else if(isSpace(currentChar)) 
						{ 
							//忽略空格 
						}						 
						else 
						{						 
							doSplit(result,sqlConnect,fileList[ss]); 
							result=""; 
							status=0; 
						} 
					}				 
					else 
					{ 
						MessageBox.Show("出现意外状态!"); 
					} 
				} 
			 
				//对结尾情况的处理,只处理英文,中文,数字的情况 
				if(status==1) 
				{ 
					//数字 
					//splitResult.Add(result); 
					updateTable(result,fileList[ss]);	 
				} 
				else if(status==2) 
				{ 
					//带小数点的数字 
					//splitResult.Add(result); 
					updateTable(result,fileList[ss]);	 
				} 
				else if(status==5) 
				{ 
					//单词 
					//splitResult.Add(result); 
					updateTable(result,fileList[ss]);	 
				} 
				else if(status==7) 
				{ 
					//汉字 
					doSplit(result,sqlConnect,fileList[ss]); 
				} 
				else 
				{ 
					 
				} 
 
				 
				counter++; 
 
				//更新文件列表 
				updateFileTable(fileList[ss]); 
				//文章总数加1 
				articleSum+=1; 
				typeSum+=1; 
 
				//每三个文章,或者到文件列表末尾保存一次,防止临时表数据过多					 
				if(counter>=3||ss==this.fileList.Length-1) 
				{ 
					save2DB(); 
					ds.Tables["word2Temp"].Clear(); 
					counter=0; 
				} 
				 
				threadMsg="文件:"+fileList[ss]+" 处理完毕\r\n"; 
				mesBox.Text+=threadMsg; 
//				Thread msgThread=new Thread(new ThreadStart(this.setMessage)); 
//				msgThread.Priority=ThreadPriority.Highest; 
//				msgThread.Start(); 
//				//启动前的延迟 
//				while(!msgThread.IsAlive);									 
//				msgThread.Join(); 
				 
				sr.Close();				 
			} 
			 
			studyBtn.Enabled=true; 
			//结果更新到数据库 
//			string ccc=type; 
			//saveDB(); 
 
		} 
 
 
		private void setMessage() 
		{ 
			mesBox.Text+=threadMsg; 
		} 
 
		 
		#region 判断类型函数 
		private bool isSpace(char currentChar) 
		{ 
			if(currentChar==' ') 
			{ 
				return true; 
			} 
			else 
			{ 
				return false; 
			} 
		} 
 
		private bool isChinese(char currentChar) 
		{ 
			if(currentChar>=0x4e00 && currentChar<=0x9fa5) 
			{ 
				return true; 
			} 
			else 
			{ 
				return false; 
			} 
		} 
 
		private bool isNumber(char currentChar) 
		{ 
			if(currentChar>='0' && currentChar<='9') 
			{ 
				return true; 
			} 
			else 
			{ 
				return false; 
			} 
		} 
				 
		private bool isWord(char currentChar) 
		{ 
			if(currentChar>='a' && currentChar<='z' || currentChar>='A' && currentChar<='Z') 
			{ 
				return true; 
			} 
			else 
			{ 
				return false; 
			} 
		} 
		 
 
		//检查字符是否是终结符 
		private bool isEnd(char currentChar) 
		{ 
			for(int i=0;i0) 
					{					 
						//currentPos即剩余串的长度 
						if(currentPos<=4) 
						{ 
							subStr=sourceLine.Substring(0,currentPos); 
							subStr=backCheckSub(subStr); 
						 
							currentPos-=subStr.Length; 
							rightPos.Add(currentPos); 
						} 
						else 
						{ 
							subStr=sourceLine.Substring(currentPos-4,4); 
							subStr=backCheckSub(subStr); 
						 
							currentPos-=subStr.Length; 
							rightPos.Add(currentPos); 
						} 
 
						rightSplit.Add(subStr); 
						if(searchType==1) 
						{ 
							if(!saveSingle&&subStr.Length<=1) 
							{ 
								 
							} 
							else 
							{ 
								//splitResult.Add(subStr); 
								updateTable(subStr,file); 
							} 
						} 
					} 
				 
				} 
 
				//计算混合概率密度的变量 
				int i,j,p; 
 
				//用于存储前向、后向扫描最大概率 
				float pRight=1,pLeft=1;					 
				int lastPosI=0,lastPosJ=rightPos.Count-1; 
 
				if(searchType==2) 
				{ 
					 
					i=1; 
					j=rightPos.Count-2; 
					 
					while(i=0) 
					{ 
						int ll=Convert.ToInt32(leftPos[i]); 
						int rr=Convert.ToInt32(rightPos[j]); 
						if(ll>rr) 
						{ 
							pRight*=((float)(Convert.ToInt32(rightFreq[j])))/denominator; 
							j--; 
						} 
						else if(llj;p--) 
									{				 
										subStr=(string)rightSplit[p]; 
										if(!saveSingle&&subStr.Length<=1) 
										{ 
											 
										} 
										else 
										{ 
											//splitResult.Add(subStr); 
											updateTable(subStr,file);	 
										} 
										//splitResult.Add(rightSplit[p]); 
									} 
 
									//记录结点位置 
									lastPosI=i; 
									lastPosJ=j; 
									i++; 
									j--; 
									 
								} 
								else 
								{ 
									//正向搜索的概率大 
									for(p=lastPosI;pj;p--) 
							{ 
								subStr=(string)rightSplit[p]; 
								if(!saveSingle&&subStr.Length<=1) 
								{ 
											 
								} 
								else 
								{ 
									//splitResult.Add(subStr); 
									updateTable(subStr,file); 
								} 
								//splitResult.Add(rightSplit[p]); 
							}		 
						} 
						else 
						{ 
							//正向搜索的概率大 
							for(p=lastPosI+1;p0) 
				{ 
					leftFreq.Add(drs[0]["wfreq"].ToString());					 
					return subString; 
				} 
				else 
				{					 
					leftFreq.Add("0"); 
					return subString; 
				} 
			} 
			else 
			{ 
				DataRow[] drs=ds1.Tables["word"].Select("word=\'"+subString+"\'"); 
				 
				if(drs.Length>0) 
				{ 
					leftFreq.Add(drs[0]["wfreq"].ToString());					 
					return subString; 
				} 
				else 
				{										 
					return foreCheckSub(subString.Substring(0,subString.Length-1)); 
				} 
 
			} 
		} 
 
 
		//逆向递归搜索程序 
		private string backCheckSub(string subString) 
		{ 
			if(subString.Length<=1) 
			{ 
				DataRow[] drs=ds1.Tables["word"].Select("word=\'"+subString+"\'"); 
				if(drs.Length>0) 
				{ 
					rightFreq.Add(drs[0]["wfreq"].ToString());					 
					return subString; 
				} 
				else 
				{					 
					rightFreq.Add("0"); 
					return subString; 
				} 
			} 
			else 
			{ 
				DataRow[] drs=ds1.Tables["word"].Select("word=\'"+subString+"\'"); 
				if(drs.Length>0) 
				{ 
					rightFreq.Add(drs[0]["wfreq"].ToString());					 
					return subString; 
				} 
				else 
				{					 
					return backCheckSub(subString.Substring(1,subString.Length-1)); 
				} 
			} 
		} 
 
		#endregion 
 
		#region 临时表操作 
 
		///  
		/// 创建临时表 
		///  
		///  
		///  
		private bool getTable(string name) 
		{ 
			try 
			{ 
				ds.Tables.Add(name); 
				DataTable dt=ds.Tables[name]; 
			 
				//创建自增列 
				DataColumn dc=new DataColumn("lrno"); 
				dc.AutoIncrementStep=1; 
				dc.AutoIncrement=true; 
				dc.DataType=System.Type.GetType("System.Int32"); 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("word"); 
				dc.DataType=System.Type.GetType("System.String");				 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("times"); 
				dc.DataType=System.Type.GetType("System.Int32"); 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("file"); 
				dc.DataType=System.Type.GetType("System.String");			 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("sum"); 
				dc.DataType=System.Type.GetType("System.Int32"); 
				dt.Columns.Add(dc); 
 
				return true; 
			} 
			catch(Exception ex) 
			{ 
				MessageBox.Show("创建临时表失败:"+ex.Message); 
				return false; 
			} 
		} 
		 
 
		private void updateFileTable(string file) 
		{ 
			DataRow [] drs; 
			DataRow dr; 
			string selectcmd; 
			//更新扫描文档记录的表 
			selectcmd="filePath=\'"+file+"\'"; 
			 
			drs=ds.Tables["fileName"].Select(selectcmd); 
 
			if(drs.Length==0) 
			{ 
				dr=ds.Tables["fileName"].NewRow(); 
				dr["filePath"]=file; 
				dr["typeName"]=typeLabel.Text; 
				dr["times"]="1"; 
				ds.Tables["fileName"].Rows.Add(dr); 
			} 
			else 
			{ 
				drs[0]["times"]=Convert.ToInt32(drs[0]["times"])+1;								 
			} 
		} 
		 
		///  
		/// 更新临时表 
		///  
		///  
		///  
		private void updateTable(string word,string file) 
		{ 
			//drs=ds.Tables["word2Temp"].Select("word=\'"+result+"\' and file!=\'"+fileList[ss]+"\'"); 
			DataRow [] drs,drs2; 
			DataRow dr; 
			//不等于是<> 
			string selectcmd="word=\'"+word+"\'"; 
			drs=ds.Tables["word2Temp"].Select(selectcmd); 
			int j; 
	 
			//更新记录词出现次数的临时表 
			if(drs.Length>0) 
			{ 
				//存在这个词 
				selectcmd="word=\'"+word+"\' and file<>\'"+file+"\'"; 
				drs2=ds.Tables["word2Temp"].Select(selectcmd); 
				if(drs2.Length>0) 
				{ 
					//存在这个词,且文件不同时,出现次数增一 
					j=Convert.ToInt32(drs[0]["times"]); 
					j++; 
					drs[0]["times"]=j; 
					drs[0]["file"]=file; 
				} 
			} 
			else 
			{ 
				//不存在这个词,插入 
				dr=ds.Tables["word2Temp"].NewRow(); 
				dr["word"]=word;								 
				dr["times"]=1; 
				dr["file"]=file; 
				ds.Tables["word2Temp"].Rows.Add(dr); 
			} 
 
			 
		} 
 
		#endregion 
 
 
		#region 统计保存结果 
 
		//保存二项分类结果 
		private void save2DB() 
		{ 
			string cmd; 
			 
			OleDbDataAdapter sqlAd,sqlAd2,sqlAd3; 			 
			DataRow dr,dr2; 
			DataRow[] drs; 
			int j,k,wSum; 
			string word; 
//			OleDbTransaction sqlTran; 
//			sqlTran=sqlConnect.BeginTransaction(); 
			//二项模型表 
			cmd="select * from "+type+"2"; 
 
			sqlAd=new OleDbDataAdapter(cmd,sqlConnect); 
			 
			sqlAd.Fill(ds,"word2"); 
			 
 
//			OleDbCommandBuilder sqlBuilder1; 
//			//临时表 
//			if(recTempTable) 
//			{ 
//				sqlAd1=new OleDbDataAdapter("select * from tempTable",sqlConnect); 
//				sqlBuilder1=new OleDbCommandBuilder(sqlAd1); 
//			} 
 
			//词频汇总表 
			cmd="select * from wordSum2"; 
			sqlAd2=new OleDbDataAdapter(cmd,sqlConnect); 
			sqlAd2.Fill(ds,"wordSum2"); 
			 
			 
			//扫描文章记录表 
			sqlAd3=new OleDbDataAdapter("select * from scanFile",sqlConnect); 
			sqlAd3.Fill(ds,"fileName"); 
			 
			OleDbCommandBuilder sqlBuilder=new OleDbCommandBuilder(sqlAd); 
			OleDbCommandBuilder sqlBuilder2=new OleDbCommandBuilder(sqlAd2); 
			OleDbCommandBuilder sqlBuilder3=new OleDbCommandBuilder(sqlAd3); 
 
			for(int i=0;i0) 
				{ 
					j=Convert.ToInt32(drs[0]["times"]); 
					j=j+k; 
					drs[0]["times"]=j; 
					//将该词的出现总次数记入temp表,用于计算互信息 
					ds.Tables["word2Temp"].Rows[i][4]=j; 
					wSum=j; 
				} 
				else 
				{ 
					dr2=ds.Tables["wordSum2"].NewRow(); 
					dr2["word"]=word; 
					dr2["times"]=k; 
					ds.Tables["wordSum2"].Rows.Add(dr2); 
					//将该词的出现总次数记入temp表,用于计算互信息 
					ds.Tables["word2Temp"].Rows[i][4]=1; 
					//没出现过这个词,则词的总出现次数即为当前的出现次数 
					wSum=k; 
				} 
 
				//互信息不在此处计算 
				//还要将互信息记录到类别表中 
				cmd="word=\'"+word+"\'"; 
				drs=ds.Tables["word2"].Select(cmd); 
 
				if(drs.Length>0) 
				{ 
					j=Convert.ToInt32(drs[0]["times"]); 
					j=j+k; 
					drs[0]["times"]=j; 
//					cal=j*articleSum/(typeSum*wSum); 
//					MI=Math.Log(cal); 
//					drs[0]["MI"]=MI; 
					 
				} 
				else 
				{ 
					dr2=ds.Tables["word2"].NewRow(); 
					dr2["word"]=word; 
					dr2["times"]=k; 
//					cal=k*articleSum/(typeSum*wSum); 
//					MI=Math.Log(cal); 
//					dr2["MI"]=MI; 
					ds.Tables["word2"].Rows.Add(dr2); 
				} 
			} 
 
			 
			//更新到数据库 
			//这部分最容易出错,可以在这里设断点 
			try 
			{ 
//				if(recTempTable) 
//				{ 
//					sqlAd1.Update(ds,"word2Temp");					 
//					ds.Tables["word2Temp"].AcceptChanges();	 
//				} 
 
				sqlAd3.Update(ds,"fileName"); 
				ds.Tables["fileName"].AcceptChanges(); 
				//ds.Tables["fileName"].Clear(); 
				ds.Tables["fileName"].Clear(); 
 
				sqlAd.Update(ds,"word2"); 
				ds.Tables["word2"].AcceptChanges(); 
				ds.Tables["word2"].Clear(); 
 
				sqlAd2.Update(ds,"wordSum2"); 
				ds.Tables["wordSum2"].AcceptChanges(); 
				ds.Tables["wordSum2"].Clear(); 
 
			} 
			catch(Exception ex) 
			{ 
				string ccc; 
				OleDbCommand sqlCom=sqlBuilder.GetInsertCommand(); 
				ccc=sqlCom.CommandText; 
 
				sqlCom=sqlBuilder2.GetInsertCommand(); 
				ccc=sqlCom.CommandText; 
 
				sqlCom=sqlBuilder3.GetInsertCommand(); 
				ccc=sqlCom.CommandText; 
 
				MessageBox.Show("更新数据库出现问题:"+ex.Message); 
			} 
		} 
		#endregion 
 
		private void prevBtn_Click(object sender, System.EventArgs e) 
		{ 
			try 
			{ 
				Form f=new studySelect();		 
				f.MdiParent=this.MdiParent; 
				f.Show(); 
				this.Close(); 
			} 
			catch(Exception ex) 
			{ 
				MessageBox.Show(ex.Message); 
			} 
		} 
 
 
	} 
}