www.pudn.com > DTreebydhm.rar > DecisionTree.cpp


#include  
#include "DecisionTree.h" 
 
DecisionTree:: DecisionTree() 
{ //构造函数 
	root=new Node(); 
	numOfIns=0; 
	numOfAttr=0; 
	trainingSet=NULL; 
	weight=NULL; 
	used=NULL; 
	numOfNodes=0; 
} 
 
DecisionTree:: ~DecisionTree() 
{ //析构函数 
	DeleteTree(root); 
	if(trainingSet!=NULL) 
	{ 
		delete []trainingSet; 
		trainingSet=NULL; 
	} 
	if(used!=NULL) 
	{ 
		delete []used; 
		used=NULL; 
	} 
} 
 
Node* DecisionTree:: GetRoot() 
{ 
	return this.root; 
} 
 
int DecisionTree:: GetnumOfIns() 
{ 
	return this.numOfIns; 
} 
 
int DecisionTree:: GetnumOfAttr() 
{ 
	return this.numOfAttr; 
} 
 
void DecisionTree:: SetnumOfIns(int numOfIns) 
{ 
	this.numOfIns=numOfIns; 
} 
 
void DecisionTree:: SetnumOfAttr(int numOfAttr) 
{ 
	this.numOfAttr=numOfAttr; 
} 
 
int DecisionTree:: GetnumOfNodes() 
{ 
	return this.numOfNodes; 
} 
 
void DecisionTree:: DeleteTree(Node* root) 
{ 
	if(root->leftchild!=NULL){ 
		deleteTree(root->leftchild); 
	} 
	if(root->rightchild!=NULL) 
		deleteTree(root->rightchild); 
	delete root; 
} 
 
/*double DecisionTree:: Entropy(int attribute,int start,int end,double** trainingSet) 
{ //由非类别属性attribute划分子集的熵 
		int number=Count(attribute,start,end,trainingSet); 
		int* count=new int[number];//记录各属性值数目 
		for(i=0;igain) 
		{ 
			gain=gain[i-start]; 
		} 
	} 
    return gain; 
}*/ 
 
double DecisionTree:: GainRatio(int attribute, int start,int end,double** trainingSet) 
{  //信息增益率 
	double gainratio; 
	double gain=0; 
	sort(attribute,start,end,trainingSet); 
	int i; 
	int splitpoint; 
	double* g=new double[end-start]; 
	double info=SplitInfo(numOfAttr-1,start,end,trainingSet); 
	for(i=start;igain) 
		{ 
			gain=g[i-start]; 
			splitpoint=i; 
		} 
	} 
	double splitinfo;  //分裂信息 
	splitinfo=-(splitpoint-start+1)/(end-start+1)*(log((splitpoint-start+1)/(end-start+1))/log(2))-(end-splitpoint)/(end-start+1)*(log((end-splitpoint)/(end-start+1))/log(2)); 
    gainratio=gain/splitinfo;  
    return gainratio; 
} 
 
void DecisionTree:: sort(int attribute,int start,int end,double** trainingSet) 
{ 
	int test; 
	for(test=start; testtrainingSet[test+1][attribute]){ 
			break; 
		} 
	if(test==end) return; 
    int i,j; 
    i=start;   
    j=end;  
    double* pivot=new double[numOfAttr]; 
	for(int kk =0;kk trainingSet[i][attribute]) i++;    
        if(i1) sort(attribute,start,i-1,trainingSet); 
    if(end-j>1) sort(attribute,j+1,end,trainingSet);	 
} 
 
int DecisionTree:: Count(int attribute,int start,int end,double** trainingSet) 
{ 
	/*int number=end-start+1;   //计算离散属性值attribute的数目 
	int i; 
	int j; 
	for(i=start;i<=end;i++)   
	{ 
		for(j=start;jroot=GenerateTree(0,numOfIns-1,trainingSet); 
	 cout<<"This tree is built"<isLeaf = 1; 
		node->attribute = int(trainingSet[start][numOfAttr-1]); 
		if(node->attribute==1) 
			node->value=1; 
		else node->value=0; 
		numOfNodes++; 
		return node; 
	} 
	if(numOfAttr==1)  //只有类别属性,返回一个结点,其值是出现最多的属性值 
	{ 
		int countneg=0; 
		int countpos=0; 
		for(int i=start;i<=end;i++) 
		{ 
			if(trainingSet[i][numOfAttr-1]==0) 
				countneg++; 
			else if(trainingSet[i][numOfAttr-1]==1) 
				countpos++; 
		} 
		int more; 
		if(countneg>countpos) 
			more=0; 
		else 
			more=1; 
		Node* node=new Node(); 
		node->isLeaf=1; 
		node->attribute=more; 
		node->value=countpos/(end-start+1); 
		numOfNodes++; 
		return node; 
	} 
	else    //选择信息增益比最大的属性进行分类 
	{ 
		int* used=new int[numOfAttr]; 
		for(int n=0;ngain) 
				{ 
				    gain=g[j-start]; 
				    splitpoint[i]=j; 
				} 
			} 
			if(!used[i]&&gainratioisLeaf=1; 
                int numOfPos=0; 
				int numOfNeg=0; 
				for(int i=start;i<=end;i++) 
				{ 
					if(trainingSet[i][numOfAttr-1]==1) 
						numOfPos++; 
					else numOfNeg++; 
				} 
				node->value=numOfPos/(end-start+1);//有病的概率 
				if(numOfPos>=numOfNeg) 
					node->attribute=1; 
				else node->attribute=0; 
			} 
			else//继续生长 
			{ 
				node->value=trainingSet[splitpoint[attribute]][attribute]; 
				node->attribute=attribute; 
				node->isLeaf=0; 
				node->leftchild=GenerateTree(start,splitpoint[attribute],trainingSet); 
			    node->rightchild=GenerateTree(splitpoint[attribute]+1,end,trainingSet); 
			} 
			return node; 
	} 
} 
 
double DecisionTree:: ClassifyIns(double* instance) 
{ 
	if(root==NULL) 
		return -1; 
	Node* p=root; 
	while(p->isLeaf==0) 
	{ 
		if(instance[p->attribute]<=p->value) 
			p=p->leftchild; 
		else 
			p=p->rightchild; 
	} 
	return p->value; 
} 
 
void DecisonTree:: SaveTree(ofstream& fout,Node* node) 
{ 
    if(node==NULL) 
	{ 
		return; 
	} 
	fout<isLeaf<value<attribute<leftchild!=NULL) 
    	SaveTree(fout,node->leftchild); 
	if(node->rightchild!=NULL) 
    	SaveTree(fout,node->rightchild); 
} 
 
void DecisonTree:: SaveTree(char* fname) 
{ 
	ofstream fout(fname); 
	saveTree(fout,root); 
	fout.close(); 
} 
 
Node* DecisonTree:: LoadNode(ifstream& fin) 
{ 
	int i = 0; 
	if(fin.eof()) 
	{ 
		return NULL; 
	} 
	else 
	{ 
		int isLeaf = 0; 
		double value =0; 
		int attribute = 0; 
		fin>>isLeaf; 
		fin>>value; 
		fin>>attribute; 
		Node* node = new Node(value,attribute,isLeaf); 
		if(node->isLeaf ==1) 
		{ 
			return node; 
		} 
	    node->leftchild = LoadNode(fin); 
		node->rightchild = LoadNode(fin); 
		return node; 
	} 
} 
 
void CDecisonTree:: LoadTree(char* fname) 
{ 
	ifstream fin(fname); 
	root= LoadNode(fin); 
	fin.close(); 
} 
 
void DecisonTree:: ShowTree(Node* node) 
{ 
	if(node ==NULL) 
		return; 
	cout<<"tree node"<GetValue()<<" "<GetAttribute()<<" "<IsLeaf()<leftchild!=NULL) 
	{ 
		ShowTree(node->leftchild); 
	} 
	if(node->rightchild!=NULL) 
		ShowTree(node->rightchild); 
	 
}