www.pudn.com > classifier.rar > classifySelect.cs


using System; 
using System.Drawing; 
using System.Collections; 
using System.ComponentModel; 
using System.Windows.Forms; 
using System.IO; 
using System.Data; 
using System.Data.OleDb; 
 
namespace classifier 
{ 
	///  
	/// classifySelect 的摘要说明。 
	///  
	public class classifySelect : System.Windows.Forms.Form 
	{ 
		private System.Windows.Forms.Button selectBtn; 
		private System.Windows.Forms.Label label1; 
		private System.Windows.Forms.OpenFileDialog openFileDialog1; 
		private System.Windows.Forms.Button startBtn; 
		private System.Windows.Forms.Label label3; 
		private System.Windows.Forms.Label label4; 
		private System.Windows.Forms.ComboBox comboBox2; 
		private System.Windows.Forms.ComboBox comboBox1; 
		private System.Windows.Forms.ListBox selectList; 
 
		//分类器部分 
		private int searchType;			//正向搜索,逆向搜索,双向搜索 
		private OleDbConnection sqlConnect;		 
		private char[] endChar;			//终结符 
		private char[] missChar;		//忽略符 
		private char[] word;			//英文字母 
		private char[] number;			//数字 
		private ArrayList leftSplit,rightSplit,splitResult,leftPos,rightPos,leftFreq,rightFreq; 
		private DataSet ds; 
		private int denominator;		 
		private bool saveSingle;		//是否保存单个字的单词的标志 
		private string type; 
		private string [] fileList; 
		private System.Windows.Forms.Label label5; 
		private System.Windows.Forms.TextBox textBox1; 
		private System.Windows.Forms.Label label6; 
		private System.Windows.Forms.TextBox textBox2; 
		private int threshold,word4cal; 
		private double V; 
		private System.Windows.Forms.TextBox textBox3; 
		 
		 
 
		///  
		/// 必需的设计器变量。 
		///  
		private System.ComponentModel.Container components = null; 
 
		public classifySelect() 
		{ 
			// 
			// Windows 窗体设计器支持所必需的 
			// 
			InitializeComponent(); 
 
			// 
			// TODO: 在 InitializeComponent 调用后添加任何构造函数代码 
			// 
		} 
 
		///  
		/// 清理所有正在使用的资源。 
		///  
		protected override void Dispose( bool disposing ) 
		{ 
			if( disposing ) 
			{ 
				if(components != null) 
				{ 
					components.Dispose(); 
				} 
			} 
			base.Dispose( disposing ); 
		} 
 
		#region Windows 窗体设计器生成的代码 
		///  
		/// 设计器支持所需的方法 - 不要使用代码编辑器修改 
		/// 此方法的内容。 
		///  
		private void InitializeComponent() 
		{ 
			this.selectBtn = new System.Windows.Forms.Button(); 
			this.label1 = new System.Windows.Forms.Label(); 
			this.openFileDialog1 = new System.Windows.Forms.OpenFileDialog(); 
			this.startBtn = new System.Windows.Forms.Button(); 
			this.comboBox2 = new System.Windows.Forms.ComboBox(); 
			this.label3 = new System.Windows.Forms.Label(); 
			this.label4 = new System.Windows.Forms.Label(); 
			this.comboBox1 = new System.Windows.Forms.ComboBox(); 
			this.selectList = new System.Windows.Forms.ListBox(); 
			this.label5 = new System.Windows.Forms.Label(); 
			this.textBox1 = new System.Windows.Forms.TextBox(); 
			this.label6 = new System.Windows.Forms.Label(); 
			this.textBox2 = new System.Windows.Forms.TextBox(); 
			this.textBox3 = new System.Windows.Forms.TextBox(); 
			this.SuspendLayout(); 
			//  
			// selectBtn 
			//  
			this.selectBtn.Location = new System.Drawing.Point(568, 40); 
			this.selectBtn.Name = "selectBtn"; 
			this.selectBtn.TabIndex = 1; 
			this.selectBtn.Text = "选择"; 
			this.selectBtn.Click += new System.EventHandler(this.selectBtn_Click); 
			//  
			// label1 
			//  
			this.label1.Location = new System.Drawing.Point(32, 40); 
			this.label1.Name = "label1"; 
			this.label1.Size = new System.Drawing.Size(80, 23); 
			this.label1.TabIndex = 2; 
			this.label1.Text = "选择文件:"; 
			//  
			// openFileDialog1 
			//  
			this.openFileDialog1.Multiselect = true; 
			this.openFileDialog1.FileOk += new System.ComponentModel.CancelEventHandler(this.openFileDialog1_FileOk); 
			//  
			// startBtn 
			//  
			this.startBtn.Location = new System.Drawing.Point(576, 88); 
			this.startBtn.Name = "startBtn"; 
			this.startBtn.TabIndex = 3; 
			this.startBtn.Text = "统计分类"; 
			this.startBtn.Click += new System.EventHandler(this.startBtn_Click); 
			//  
			// comboBox2 
			//  
			this.comboBox2.Items.AddRange(new object[] { 
														   "是", 
														   "否"}); 
			this.comboBox2.Location = new System.Drawing.Point(416, 152); 
			this.comboBox2.Name = "comboBox2"; 
			this.comboBox2.Size = new System.Drawing.Size(121, 20); 
			this.comboBox2.TabIndex = 6; 
			this.comboBox2.Text = "否"; 
			//  
			// label3 
			//  
			this.label3.Location = new System.Drawing.Point(296, 152); 
			this.label3.Name = "label3"; 
			this.label3.TabIndex = 7; 
			this.label3.Text = "计算单字词:"; 
			//  
			// label4 
			//  
			this.label4.Location = new System.Drawing.Point(296, 192); 
			this.label4.Name = "label4"; 
			this.label4.TabIndex = 8; 
			this.label4.Text = "分词类型:"; 
			//  
			// comboBox1 
			//  
			this.comboBox1.Items.AddRange(new object[] { 
														   "正向搜索", 
														   "逆向搜索", 
														   "双向搜索"}); 
			this.comboBox1.Location = new System.Drawing.Point(416, 192); 
			this.comboBox1.Name = "comboBox1"; 
			this.comboBox1.Size = new System.Drawing.Size(121, 20); 
			this.comboBox1.TabIndex = 9; 
			this.comboBox1.Text = "逆向搜索"; 
			//  
			// selectList 
			//  
			this.selectList.ItemHeight = 12; 
			this.selectList.Location = new System.Drawing.Point(152, 40); 
			this.selectList.Name = "selectList"; 
			this.selectList.ScrollAlwaysVisible = true; 
			this.selectList.Size = new System.Drawing.Size(376, 88); 
			this.selectList.TabIndex = 10; 
			//  
			// label5 
			//  
			this.label5.Location = new System.Drawing.Point(24, 152); 
			this.label5.Name = "label5"; 
			this.label5.TabIndex = 11; 
			this.label5.Text = "特征值个数:"; 
			//  
			// textBox1 
			//  
			this.textBox1.Location = new System.Drawing.Point(152, 152); 
			this.textBox1.Name = "textBox1"; 
			this.textBox1.Size = new System.Drawing.Size(120, 21); 
			this.textBox1.TabIndex = 12; 
			this.textBox1.Text = "10"; 
			//  
			// label6 
			//  
			this.label6.Location = new System.Drawing.Point(24, 192); 
			this.label6.Name = "label6"; 
			this.label6.TabIndex = 13; 
			this.label6.Text = "特征词阈值:"; 
			//  
			// textBox2 
			//  
			this.textBox2.Location = new System.Drawing.Point(152, 192); 
			this.textBox2.Name = "textBox2"; 
			this.textBox2.Size = new System.Drawing.Size(120, 21); 
			this.textBox2.TabIndex = 14; 
			this.textBox2.Text = "9"; 
			//  
			// textBox3 
			//  
			this.textBox3.Location = new System.Drawing.Point(24, 232); 
			this.textBox3.Multiline = true; 
			this.textBox3.Name = "textBox3"; 
			this.textBox3.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; 
			this.textBox3.Size = new System.Drawing.Size(664, 424); 
			this.textBox3.TabIndex = 15; 
			this.textBox3.Text = ""; 
			//  
			// classifySelect 
			//  
			this.AutoScaleBaseSize = new System.Drawing.Size(6, 14); 
			this.ClientSize = new System.Drawing.Size(712, 677); 
			this.Controls.Add(this.textBox3); 
			this.Controls.Add(this.textBox2); 
			this.Controls.Add(this.label6); 
			this.Controls.Add(this.textBox1); 
			this.Controls.Add(this.label5); 
			this.Controls.Add(this.selectList); 
			this.Controls.Add(this.comboBox1); 
			this.Controls.Add(this.label4); 
			this.Controls.Add(this.label3); 
			this.Controls.Add(this.comboBox2); 
			this.Controls.Add(this.startBtn); 
			this.Controls.Add(this.label1); 
			this.Controls.Add(this.selectBtn); 
			this.Name = "classifySelect"; 
			this.Text = "分类测试"; 
			this.ResumeLayout(false); 
 
		} 
		#endregion 
 
		private void selectBtn_Click(object sender, System.EventArgs e) 
		{ 
			openFileDialog1.ShowDialog(this); 
 
		} 
 
		private void openFileDialog1_FileOk(object sender, System.ComponentModel.CancelEventArgs e) 
		{ 
			selectList.Items.Clear(); 
			 
			fileList=openFileDialog1.FileNames; 
 
			for(int i=0;i 
		/// 分类统计 
		///  
		///  
		///  
		private void startBtn_Click(object sender, System.EventArgs e) 
		{ 
			//设置,初始化 
			//统计分析需要统计词频,每类文章出现的总次数 
 
			if(comboBox1.SelectedText=="正向搜索") 
			{ 
				searchType=0; 
			} 
			else if(comboBox1.SelectedText=="逆向搜索") 
			{ 
				searchType=1; 
			} 
			else if(comboBox1.SelectedText=="双向搜索") 
			{ 
				searchType=2; 
			} 
			else 
			{ 
				searchType=1; 
			} 
 
			if(comboBox2.SelectedText=="是") 
			{ 
				saveSingle=true;	 
			} 
			else if(comboBox2.SelectedText=="否") 
			{ 
				saveSingle=false; 
			} 
			else 
			{ 
				saveSingle=false; 
			} 
			 
			try 
			{ 
				threshold=Convert.ToInt32(textBox2.Text); 
				word4cal=Convert.ToInt32(textBox1.Text); 
			} 
			catch(Exception ex) 
			{ 
				MessageBox.Show("请填写数字:"+ex.Message); 
			} 
			 
			 
			type="分类测试"; 
 
			//初始化结束 
 
			//先进行分词处理 
			//初始化分类器 
			initClassifier(); 
 
		} 
 
		///  
		/// 分类器初始化 
		///  
		///  
		private bool initClassifier() 
		{ 
			 
			//空格不算终结符 
			sqlConnect=db.dbOpertation.getConnection(); 
 
			if(sqlConnect.State==ConnectionState.Closed) 
			{ 
				sqlConnect.Open(); 
			}		 
			 
			 
			//从数据库中读入终结符,并写入终结符字符数组中 
 
			string cmd="select * from endChar"; 
			OleDbDataAdapter oleAd=new OleDbDataAdapter(cmd,sqlConnect); 
			DataTable dt=new DataTable(); 
			oleAd.Fill(dt); 
			cmd=""; 
			foreach( DataRow dr in dt.Rows) 
			{ 
				cmd+=dr["ending"].ToString(); 
			} 
 
			//填充需要使用的数据集,其中word表存储北大的词库,static存储统计词频的结果 
			OleDbDataAdapter sqlWordAd,sqlAd; 
			if(ds==null) 
			{ 
				ds=new DataSet(); 
				sqlWordAd=new OleDbDataAdapter("select * from words",sqlConnect); 
				sqlWordAd.Fill(ds,"word"); 
 
				 
				sqlAd=new OleDbDataAdapter("select sum(times) as classTimes,typeName from scanFile group by typeName",sqlConnect); 
				sqlAd.Fill(ds,"type"); 
			} 
			 
			 
			//填充结束 
 
			endChar=cmd.ToCharArray(); 
			 
			char[] mm={'\r','\n',' '}; 
 
			missChar=mm; 
 
			char[] nn={'0','1','2','3','4','5','6','7','8','9'}; 
			number=nn; 
 
			char[] ww={'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'}; 
			word=ww; 
 
			leftSplit=new ArrayList(); 
			leftPos=new ArrayList(); 
			leftFreq=new ArrayList(); 
			rightSplit=new ArrayList(); 
			rightPos=new ArrayList(); 
			rightFreq=new ArrayList(); 
			splitResult=new ArrayList(); 
			 
			cmd="select sum(wfreq) as aa from words"; 
			OleDbCommand sqlCom=new OleDbCommand(cmd,sqlConnect); 
			OleDbDataReader sqlRead=sqlCom.ExecuteReader(); 
			if(sqlRead.Read()) 
			{ 
				//双向分词时计算概率用的 
				denominator=Convert.ToInt32(sqlRead["aa"]); 
				sqlRead.Close(); 
				cmd="select count(*) as pp from wordSum2 "; 
				sqlCom.CommandText=cmd; 
				sqlRead=sqlCom.ExecuteReader(); 
				if(sqlRead.Read()) 
				{ 
					this.V=Convert.ToDouble(sqlRead["pp"]); 
					sqlRead.Close(); 
					sqlConnect.Close(); 
					//MessageBox.Show("分类器初始化完毕\r\n");					 
					readToEnd(); 
 
					return true; 
				} 
				else 
				{ 
					sqlRead.Close(); 
					sqlConnect.Close(); 
					return false; 
				} 
				//				fThread=new Thread(new ThreadStart(this.readToEnd)); 
				//				fThread.Priority=ThreadPriority.Normal; 
				//				fThread.Start(); 
				//				//启动前的延迟 
				//				while(!fThread.IsAlive); 
				//									 
				//				fThread.Join(); 
 
				//用线程太麻烦 
 
				 
			} 
			else 
			{ 
				sqlRead.Close(); 
				sqlConnect.Close(); 
				MessageBox.Show("分类器初始化失败\n"); 
				 
				return false; 
			} 
			 
		} 
 
		 
		///  
		/// 读入字句直到遇到一个终结符,英文单词与数字都算终结符 
		///  
		///  
		///  
		public void readToEnd() 
		{ 
			//采用自动机原理实现识别过程 
			int status=0; 
			 
			//lastChar是上一个字符,用于识别数字单词			 
			char currentChar; 
			 
			string result=""; 
			 
			//lastType记录上一个字符是数字、英文、还是终结符 
			int i=0; 
 
			// 
			StreamReader sr; 
			 
			//对文件列表中的每一个文件进行处理 
			for(int ss=0;ss= 0)  
				{ 
					//当前字符 
					currentChar=(char)sr.Read();		 
				 
					if(status==0) 
					{ 
						if(isNumber(currentChar)) 
						{ 
							//数字 
							result=currentChar.ToString(); 
							status=1;						 
						} 
						else if(isWord(currentChar)) 
						{ 
							result=currentChar.ToString(); 
							status=5; 
						}					 
						else if(isChinese(currentChar) ) 
						{ 
							//中文 
							result=currentChar.ToString(); 
							status=7; 
						} 
						else if(isEnd(currentChar)) 
						{ 
							result=""; 
							status=0; 
						} 
						else if(isSpace(currentChar)) 
						{ 
							//空格,不改变状态,继续读 
							result=""; 
							status=0; 
						} 
						else 
						{ 
							result=""; 
							status=0; 
						} 
 
					} 
					else if(status==1) 
					{ 
						//无小数数字,不统计 
						if(isNumber(currentChar)) 
						{ 
							result+=currentChar; 
							//状态不变 
						} 
						else if(currentChar=='.') 
						{ 
							result+=currentChar; 
							status=2; 
						} 
						else if(isWord(currentChar)) 
						{ 
						 
							//splitResult.Add(result); 
							result=currentChar.ToString(); 
							status=5; 
				 
						} 
						else if(isChinese(currentChar)) 
						{ 
							//splitResult.Add(result); 
							result=currentChar.ToString(); 
							status=7; 
						} 
							//					else if(isSpace(currentChar)) 
							//					{ 
							//						splitResult.Add(result); 
							//						result=""; 
							//						status=0; 
							//					} 
							//					else if(isEnd(currentChar)) 
							//					{ 
							//						splitResult.Add(result); 
							//						result=""; 
							//						status=0; 
							//					} 
						else 
						{ 
							//数字其实可以不保存 
							//splitResult.Add(result); 
							result=""; 
							status=0; 
						} 
					} 
					else if(status==2) 
					{ 
						//有小数数字,不统计 
						if(isNumber(currentChar)) 
						{ 
							result+=currentChar; 
							//状态不变 
						} 
						else if(isWord(currentChar)) 
						{ 
							//splitResult.Add(result); 
							result=currentChar.ToString(); 
							status=5; 
						} 
						else if(isChinese(currentChar)) 
						{ 
							//splitResult.Add(result); 
							result=currentChar.ToString(); 
							status=7; 
						} 
						else 
						{ 
							//其他情况,数字不保存 
							//splitResult.Add(result); 
							result=""; 
							status=0; 
						}					 
					}				 
					else if(status==5) 
					{ 
						if(isWord(currentChar)) 
						{ 
							result+=currentChar; 
							//status不变 
						} 
						else if(isNumber(currentChar)) 
						{ 
							splitResult.Add(result); 
							result=currentChar.ToString(); 
							status=1; 
						} 
						else if(isChinese(currentChar)) 
						{ 
							splitResult.Add(result); 
							result=currentChar.ToString(); 
							status=7; 
						}						 
						else 
						{ 
							//状态6,识别为一个英文单词,并将状态重新置位0 
							splitResult.Add(result); 
							result=currentChar.ToString(); 
							status=0; 
						} 
					}				 
					else if(status==7) 
					{ 
						if(isChinese(currentChar)) 
						{ 
							//中文 
							result+=currentChar; 
						 
						} 
						else if(isNumber(currentChar)) 
						{ 
							doSplit(result,sqlConnect); 
							result=currentChar.ToString(); 
							status=1; 
						} 
						else if(isWord(currentChar))	 
						{ 
							doSplit(result,sqlConnect); 
							result=currentChar.ToString(); 
							status=5; 
						} 
						else if(isSpace(currentChar)) 
						{ 
							//忽略空格 
						} 
						else 
						{						 
							doSplit(result,sqlConnect); 
							result=""; 
							status=0; 
						} 
					}				 
					else 
					{ 
						MessageBox.Show("出现意外状态!"); 
					} 
				} 
			 
				//对结尾情况的处理,只处理英文,中文,数字的情况 
				if(status==1) 
				{ 
					//数字 
					//splitResult.Add(result); 
				} 
				else if(status==2) 
				{ 
					//带小数点的数字 
					//splitResult.Add(result); 
				} 
				else if(status==5) 
				{ 
					//单词 
					splitResult.Add(result); 
				} 
				else if(status==7) 
				{ 
					//汉字 
					doSplit(result,sqlConnect); 
				} 
				else 
				{ 
					 
				} 
 
				//将分词结果存入结果表 
				saveResult(fileList[ss]); 
 
				//MessageBox.Show("文件:"+fileList[ss]+" 处理完毕"); 
 
				//mesBox.Text+=\r\n"; 
 
				//Thread msgThread=new Thread(new ThreadStart(this.setMessage)); 
				//msgThread.Priority=ThreadPriority.Highest; 
				//msgThread.Start(); 
				////启动前的延迟 
				//while(!msgThread.IsAlive);									 
				//msgThread.Join(); 
				//该语句为选出至少7个最大的词 
				//"select top 7 * from learnResult where learnResult.word in (select 艺术.word from 艺术 where 艺术.word=learnResult.word) order by times desc"; 
				 
				 
			} 
			//结果更新到数据库			 
			//saveDB(); 
			 
		} 
		 
 
		#region 判断类型函数 
		private bool isSpace(char currentChar) 
		{ 
			if(currentChar==' ') 
			{ 
				return true; 
			} 
			else 
			{ 
				return false; 
			} 
		} 
 
		private bool isChinese(char currentChar) 
		{ 
			if(currentChar>=0x4e00 && currentChar<=0x9fa5) 
			{ 
				return true; 
			} 
			else 
			{ 
				return false; 
			} 
		} 
 
		private bool isNumber(char currentChar) 
		{ 
			if(currentChar>='0' && currentChar<='9') 
			{ 
				return true; 
			} 
			else 
			{ 
				return false; 
			} 
		} 
				 
		private bool isWord(char currentChar) 
		{ 
			if(currentChar>='a' && currentChar<='z' || currentChar>='A' && currentChar<='Z') 
			{ 
				return true; 
			} 
			else 
			{ 
				return false; 
			} 
		} 
		 
 
		//检查字符是否是终结符 
		private bool isEnd(char currentChar) 
		{ 
			for(int i=0;i0) 
					{					 
						//currentPos即剩余串的长度 
						if(currentPos<=4) 
						{ 
							subStr=sourceLine.Substring(0,currentPos); 
							subStr=backCheckSub(subStr); 
						 
							currentPos-=subStr.Length; 
							rightPos.Add(currentPos); 
						} 
						else 
						{ 
							subStr=sourceLine.Substring(currentPos-4,4); 
							subStr=backCheckSub(subStr); 
						 
							currentPos-=subStr.Length; 
							rightPos.Add(currentPos); 
						} 
 
						rightSplit.Add(subStr); 
						if(searchType==1) 
						{ 
							if(!saveSingle&&subStr.Length<=1) 
							{ 
								 
							} 
							else 
							{ 
								splitResult.Add(subStr); 
							} 
						} 
					} 
				 
				} 
 
				//计算混合概率密度的变量 
				int i,j,p; 
 
				//用于存储前向、后向扫描最大概率 
				float pRight=1,pLeft=1;					 
				int lastPosI=0,lastPosJ=rightPos.Count-1; 
 
				if(searchType==2) 
				{ 
					 
					i=1; 
					j=rightPos.Count-2; 
					 
					while(i=0) 
					{ 
						int ll=Convert.ToInt32(leftPos[i]); 
						int rr=Convert.ToInt32(rightPos[j]); 
						if(ll>rr) 
						{ 
							pRight*=((float)(Convert.ToInt32(rightFreq[j])))/denominator; 
							j--; 
						} 
						else if(llj;p--) 
									{				 
										subStr=(string)rightSplit[p]; 
										if(!saveSingle&&subStr.Length<=1) 
										{ 
											 
										} 
										else 
										{ 
											splitResult.Add(subStr); 
										} 
										//splitResult.Add(rightSplit[p]); 
									} 
 
									//记录结点位置 
									lastPosI=i; 
									lastPosJ=j; 
									i++; 
									j--; 
									 
								} 
								else 
								{ 
									//正向搜索的概率大 
									for(p=lastPosI;pj;p--) 
							{ 
								subStr=(string)rightSplit[p]; 
								if(!saveSingle&&subStr.Length<=1) 
								{ 
											 
								} 
								else 
								{ 
									splitResult.Add(subStr); 
								} 
								//splitResult.Add(rightSplit[p]); 
							}		 
						} 
						else 
						{ 
							//正向搜索的概率大 
							for(p=lastPosI+1;p0) 
				{ 
					leftFreq.Add(drs[0]["wfreq"].ToString());					 
					return subString; 
				} 
				else 
				{					 
					leftFreq.Add("0"); 
					return subString; 
				} 
			} 
			else 
			{ 
				DataRow[] drs=ds.Tables["word"].Select("word=\'"+subString+"\'"); 
				 
				if(drs.Length>0) 
				{ 
					leftFreq.Add(drs[0]["wfreq"].ToString());					 
					return subString; 
				} 
				else 
				{										 
					return foreCheckSub(subString.Substring(0,subString.Length-1)); 
				} 
 
			} 
		} 
 
 
		//逆向递归搜索程序 
		private string backCheckSub(string subString) 
		{ 
			if(subString.Length<=1) 
			{ 
				DataRow[] drs=ds.Tables["word"].Select("word=\'"+subString+"\'"); 
				if(drs.Length>0) 
				{ 
					rightFreq.Add(drs[0]["wfreq"].ToString());					 
					return subString; 
				} 
				else 
				{					 
					rightFreq.Add("0"); 
					return subString; 
				} 
			} 
			else 
			{ 
				DataRow[] drs=ds.Tables["word"].Select("word=\'"+subString+"\'"); 
				if(drs.Length>0) 
				{ 
					rightFreq.Add(drs[0]["wfreq"].ToString());					 
					return subString; 
				} 
				else 
				{					 
					return backCheckSub(subString.Substring(1,subString.Length-1)); 
				} 
			} 
		} 
 
		#endregion 
 
 
		#region 统计保存结果 
 
		private void saveResult(string file) 
		{		 
			//测试文档词频统计表 
			DataTable staticTable; 
			//类别表 
			DataTable typeTable; 
			//类C的词频表 
			DataTable classTable; 
			//分类结果存储表 
			DataTable calTable;		 
			DataRow dr; 
			DataRow[] drs,drs2; 
			string cmd; 
			int i,j,f; 
			double MI,weight,articleSum,classSum,calWeight;	 
 
			//初始化工作,先将类别读入类别表 
			if(sqlConnect.State==ConnectionState.Closed) 
			{ 
				sqlConnect.Open(); 
			} 
 
			calTable=new DataTable(); 
			if(!getCalTable(ref calTable)) 
			{ 
				MessageBox.Show("创建临时表失败"); 
				return; 
			} 
			 
			staticTable=new DataTable(); 
			if(!getStaticTable(ref staticTable)) 
			{ 
				MessageBox.Show("创建临时表失败"); 
				return; 
			}			 
 
			cmd="select * from typeTable"; 
			OleDbDataAdapter sqlAd=new OleDbDataAdapter(cmd,sqlConnect); 
			typeTable=new DataTable("typeTable"); 
			sqlAd.Fill(typeTable); 
			 
			//统计词出现次数 
			for(i=0;i0) 
				{ 
					j=Convert.ToInt32(drs[0]["times"]); 
					j++; 
					drs[0]["times"]=j; 
				} 
				else 
				{ 
					dr=staticTable.NewRow(); 
					dr["word"]=splitResult[i].ToString();					 
					dr["times"]=1; 
					staticTable.Rows.Add(dr); 
				} 
			}			 
 
			//对应每个类别,分别进行比对,求出分类结果 
			for(i=0;i0) 
					{ 
						staticTable.Rows[j]["classTimes"]=drs[0]["times"]; 
						MI=Convert.ToDouble(drs[0]["MI"]); 
						f=Convert.ToInt32(staticTable.Rows[j]["times"]); 
						//如果出现频率超过阈值,则设为阈值 
						 
						 
						if(f>threshold) 
						{ 
							f=threshold; 
						} 
 
						weight=f*MI; 
 
//						//如果MI为负,f为偶数则会出现负负为正的情况 
//						if(MI<0&&f%2==0) 
//						{ 
//							weight=-(Math.Pow(MI,f)); 
//						} 
//						else 
//						{ 
//							weight=Math.Pow(MI,f); 
//						} 
					} 
					else 
					{ 
						staticTable.Rows[j]["classTimes"]=0; 
						//没出现过的话就算是等可能出现 
						weight=0; 
					} 
 
					staticTable.Rows[j]["weight"]=weight; 
				} 
				//选出最具有代表性的权值 
				drs=staticTable.Select("","weight DESC"); 
 
//				if(typeTable.Rows[i]["typeName"].ToString().Trim()=="艺术") 
//				{ 
//					dataGrid1.DataSource=staticTable; 
//					//saveToResult(staticTable,"艺术"); 
//					//return; 
//					 
//				} 
//				if(typeTable.Rows[i]["typeName"].ToString().Trim()=="体育") 
//				{ 
//					//dataGrid1.DataSource=staticTable; 
//					saveToResult(staticTable,"体育"); 
//					//return; 
//				} 
				 
				weight=0; 
				for(j=0;jthreshold) 
//					{ 
//						f=threshold; 
//					} 
 
					classSum=Convert.ToDouble(drs[j]["classTimes"]); 
					 
					weight+=calWeight; 
					//weight+=f*Math.Log((classSum+1)/(articleSum+this.V)); 
				} 
 
				weight+=Math.Log(articleSum/this.V); 
				typeTable.Rows[i]["weight"]=weight; 
 
				classTable.Clear(); 
				 
			} 
 
			//比较各类权重,求出最大值 
			drs=typeTable.Select("","weight DESC");			 
			textBox3.Text+=file+"分类结果为:"+drs[0]["typeName"].ToString()+"\r\n"; 
			//dataGrid1.DataSource=typeTable; 
		} 
 
		private void saveToResult(DataTable dt,string name) 
		{ 
			OleDbConnection sqlCon=db.dbOpertation.getConnection(); 
			string cmd="select * from learnResult"+name; 
			sqlCon.Open(); 
			OleDbDataAdapter sqlAd=new OleDbDataAdapter(cmd,sqlCon); 
			OleDbCommandBuilder sqlBuilder=new OleDbCommandBuilder(sqlAd); 
			try 
			{ 
				sqlAd.Update(dt);				 
				dt.AcceptChanges(); 
				sqlCon.Close(); 
			} 
			catch(Exception ex) 
			{ 
				sqlCon.Close(); 
				MessageBox.Show("保存到数据库出错了!"+ex.Message); 
			} 
 
		} 
 
		private bool getStaticTable(ref DataTable dt) 
		{ 
			try 
			{ 
				//创建自增列 
				DataColumn dc=new DataColumn("lrno"); 
				dc.AutoIncrementStep=1; 
				dc.AutoIncrement=true; 
				dc.DataType=System.Type.GetType("System.Int32"); 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("word"); 
				dc.DataType=System.Type.GetType("System.String");				 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("times"); 
				dc.DataType=System.Type.GetType("System.Int32"); 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("classTimes"); 
				dc.DataType=System.Type.GetType("System.Int32"); 
				dt.Columns.Add(dc); 
 
 
				dc=new DataColumn("weight"); 
				dc.DataType=System.Type.GetType("System.Double"); 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("typeName"); 
				dc.DataType=System.Type.GetType("System.String"); 
				dt.Columns.Add(dc); 
 
 
				return true; 
			} 
			catch(Exception ex) 
			{ 
				MessageBox.Show("创建临时表失败:"+ex.Message); 
 
				return false; 
			} 
		} 
 
		private bool getCalTable(ref DataTable dt) 
		{ 
			try 
			{ 
				//创建自增列 
				DataColumn dc=new DataColumn("cwno"); 
				dc.AutoIncrementStep=1; 
				dc.AutoIncrement=true; 
				dc.DataType=System.Type.GetType("System.Int32"); 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("typeName"); 
				dc.DataType=System.Type.GetType("System.String");				 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("classTimes"); 
				dc.DataType=System.Type.GetType("System.Int32"); 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("times"); 
				dc.DataType=System.Type.GetType("System.Int32"); 
				dt.Columns.Add(dc); 
 
				dc=new DataColumn("weight"); 
				dc.DataType=System.Type.GetType("System.Double"); 
				dt.Columns.Add(dc); 
 
				return true; 
			} 
			catch(Exception ex) 
			{ 
				MessageBox.Show("创建临时表失败:"+ex.Message); 
 
				return false; 
			} 
		} 
		 
 
		private void saveDB() 
		{ 
			if(sqlConnect.State==ConnectionState.Closed) 
			{ 
				sqlConnect.Open(); 
			} 
			string cmd="select * from learnResult"; 
 
			OleDbDataAdapter sqlAd=new OleDbDataAdapter(cmd,sqlConnect); 
			OleDbCommandBuilder sqlBuilder=new OleDbCommandBuilder(sqlAd); 
			try 
			{ 
				//sqlAd.Update(ds,"static"); 
				//ds.Tables["static"].AcceptChanges(); 
			} 
			catch(Exception ex) 
			{ 
				MessageBox.Show("保存到数据库出错了!"+ex.Message); 
			} 
 
			sqlConnect.Close(); 
		} 
		 
 
		#endregion 
 
 
	} 
}