www.pudn.com > apriori算法的java代码.rar > Apriori.java


//Implement Apriori Algrithm 
//Public Interface: 
//Apriori(); Init();GenerateFrequence(); 
 
import java.lang.* ; 
import java.io.* ; 
 
class Apriori 
{ 
	//members 
	double minSup ,minCon ; //最小支持度,最小置信度 
	double deltaSup,deltaCon ; 
	int minNum ,num; //最小计数 
	 
	FastVector fullSet ; //从GetSource class中,取得的全集(原始数据集) 
	FastVector frequence; //最大的频繁集 
	FastVector[] children ; //存放子集的FastVector数组,使用前需new 
	FastVector ruleSrc,ruleDest ; //存放规则:源,目标;本身为big 
	FastVector confidence,support ;//特殊的FastVector,存放的是和rule对应的confidence,support 
	FastVector test ; //仅供测试 
	 
	//Constuctor 
	public Apriori() 
	{ 
		minSup = 0.2 ; 
		minCon = 0.4 ; 
		minNum = 2 ; 
		ruleSrc = new FastVector(); 
		ruleDest = new FastVector() ; 
		confidence = new FastVector() ; //remember:special 
		support = new FastVector() ; //remember:special 
 
	} 
	 
	//methods 
	public void WriteBig(FastVector big) //仅供测试 
	throws FileNotFoundException,IOException 	{ //big 
		FileOutputStream fOut = new FileOutputStream("vector.txt"); 
		int size1 = big.size() , size2; 
		FastVector temp ; 
		Item item ; 
		int data ; 
		 
		for(int i=0 ; i"+"    confidence is:"+ 
				((Double)confidence.elementAt(i)).toString() ); 
			Print((FastVector)ruleDest.elementAt(i)); 
			 
		} 
	} 
	 
	public void SetSup(double sup){ 
		double num = fullSet.size(); //总itemsets数 
		if(sup <= 1.0) 
			minSup = sup ; 
		minNum = (int)((num * sup)+1.0) ; 
	} 
	 
	public void SetCon(double con){ 
		if(con <= 1.0) 
			minCon = con ; 
	} 
	 
	public void Init(String str) //从文件 'str'中,初始化全集 
	throws IOException,FileNotFoundException 
	{ 		 
		GetSource src = new GetSource(str); 
		fullSet = src.GetAll(); //Get it! 
		num = fullSet.size(); 
	} 
	 
	private boolean IsContain(FastVector itemset,Item item) //itemset是否包含item? 
	{ 
		Item item0 ;  
		for(int i=0;i=0 ; i--) 
		{ 
			temp = (FastVector)Cb.elementAt(i) ; 
			if( Num(fullSet,temp) < minNum ) //如果计数小于最小计数 
				Cb.removeElementAt(i) ; //则删去该itemset 
		}	 
		 
		return Cb; 
	}//pass 
	 
	public void GenerateFrequence() //由最初的原始数据集,生成满足最小计数的最大频繁集 
	{ 
		FastVector swp ; 
		FastVector vector = BreakOne(fullSet); //get items 
		 
		vector = ToC1(vector); //get C1 
		while(vector.size() > 0) //Ck to Ck+1 
		{ 
			frequence = SelfConnect(vector);    
			swp = vector ; 
			vector = frequence ; 
			frequence = swp ; 
		} 
		 
		children = new FastVector[frequence.size()]; //取得频繁集后,初始化子集的数组	 
		for(int i=0 ; i minCon ) //满足最小置信度,填写rule 
				{ 
					sup = (double)Num(fullSet,child) / num ; 
					odds = Odds(mother,child); 
					 
					ruleSrc.addElement(child); 
					ruleDest.addElement(odds);	 
					confidence.addElement(new Double(con));	 
					support.addElement(new Double(sup))	; 
				}				 
			} 
		} 
	} 
		 
	public static void main(String arg[]) 
	throws FileNotFoundException,IOException  
	{ 
		FastVector temp,big,itemset; 
		Item item; 
		 
		Apriori app = new Apriori(); 
		 
		app.Init("zoo.arff");             // get the full set 
		app.SetSup(0.4); 
		app.SetCon(0.8); 
		app.PrintBig(app.fullSet); 
    
      //Going Apriori	 
		app.GenerateFrequence(); 
		app.WriteBig(app.frequence); 
		app.AllChildren(app.frequence); 
//		app.PrintChildren(); 
		app.GenerateRules(); 
//		app.PrintRules();			 
		 
		Output.WriteRules(app); 
	}	 
}