www.pudn.com > DTreebydhm.rar > DecisionTree.cpp
#include#include "DecisionTree.h" DecisionTree:: DecisionTree() { //构造函数 root=new Node(); numOfIns=0; numOfAttr=0; trainingSet=NULL; weight=NULL; used=NULL; numOfNodes=0; } DecisionTree:: ~DecisionTree() { //析构函数 DeleteTree(root); if(trainingSet!=NULL) { delete []trainingSet; trainingSet=NULL; } if(used!=NULL) { delete []used; used=NULL; } } Node* DecisionTree:: GetRoot() { return this.root; } int DecisionTree:: GetnumOfIns() { return this.numOfIns; } int DecisionTree:: GetnumOfAttr() { return this.numOfAttr; } void DecisionTree:: SetnumOfIns(int numOfIns) { this.numOfIns=numOfIns; } void DecisionTree:: SetnumOfAttr(int numOfAttr) { this.numOfAttr=numOfAttr; } int DecisionTree:: GetnumOfNodes() { return this.numOfNodes; } void DecisionTree:: DeleteTree(Node* root) { if(root->leftchild!=NULL){ deleteTree(root->leftchild); } if(root->rightchild!=NULL) deleteTree(root->rightchild); delete root; } /*double DecisionTree:: Entropy(int attribute,int start,int end,double** trainingSet) { //由非类别属性attribute划分子集的熵 int number=Count(attribute,start,end,trainingSet); int* count=new int[number];//记录各属性值数目 for(i=0;i gain) { gain=gain[i-start]; } } return gain; }*/ double DecisionTree:: GainRatio(int attribute, int start,int end,double** trainingSet) { //信息增益率 double gainratio; double gain=0; sort(attribute,start,end,trainingSet); int i; int splitpoint; double* g=new double[end-start]; double info=SplitInfo(numOfAttr-1,start,end,trainingSet); for(i=start;i gain) { gain=g[i-start]; splitpoint=i; } } double splitinfo; //分裂信息 splitinfo=-(splitpoint-start+1)/(end-start+1)*(log((splitpoint-start+1)/(end-start+1))/log(2))-(end-splitpoint)/(end-start+1)*(log((end-splitpoint)/(end-start+1))/log(2)); gainratio=gain/splitinfo; return gainratio; } void DecisionTree:: sort(int attribute,int start,int end,double** trainingSet) { int test; for(test=start; test trainingSet[test+1][attribute]){ break; } if(test==end) return; int i,j; i=start; j=end; double* pivot=new double[numOfAttr]; for(int kk =0;kk trainingSet[i][attribute]) i++; if(i 1) sort(attribute,start,i-1,trainingSet); if(end-j>1) sort(attribute,j+1,end,trainingSet); } int DecisionTree:: Count(int attribute,int start,int end,double** trainingSet) { /*int number=end-start+1; //计算离散属性值attribute的数目 int i; int j; for(i=start;i<=end;i++) { for(j=start;jroot=GenerateTree(0,numOfIns-1,trainingSet); cout<<"This tree is built"< isLeaf = 1; node->attribute = int(trainingSet[start][numOfAttr-1]); if(node->attribute==1) node->value=1; else node->value=0; numOfNodes++; return node; } if(numOfAttr==1) //只有类别属性,返回一个结点,其值是出现最多的属性值 { int countneg=0; int countpos=0; for(int i=start;i<=end;i++) { if(trainingSet[i][numOfAttr-1]==0) countneg++; else if(trainingSet[i][numOfAttr-1]==1) countpos++; } int more; if(countneg>countpos) more=0; else more=1; Node* node=new Node(); node->isLeaf=1; node->attribute=more; node->value=countpos/(end-start+1); numOfNodes++; return node; } else //选择信息增益比最大的属性进行分类 { int* used=new int[numOfAttr]; for(int n=0;n gain) { gain=g[j-start]; splitpoint[i]=j; } } if(!used[i]&&gainratio isLeaf=1; int numOfPos=0; int numOfNeg=0; for(int i=start;i<=end;i++) { if(trainingSet[i][numOfAttr-1]==1) numOfPos++; else numOfNeg++; } node->value=numOfPos/(end-start+1);//有病的概率 if(numOfPos>=numOfNeg) node->attribute=1; else node->attribute=0; } else//继续生长 { node->value=trainingSet[splitpoint[attribute]][attribute]; node->attribute=attribute; node->isLeaf=0; node->leftchild=GenerateTree(start,splitpoint[attribute],trainingSet); node->rightchild=GenerateTree(splitpoint[attribute]+1,end,trainingSet); } return node; } } double DecisionTree:: ClassifyIns(double* instance) { if(root==NULL) return -1; Node* p=root; while(p->isLeaf==0) { if(instance[p->attribute]<=p->value) p=p->leftchild; else p=p->rightchild; } return p->value; } void DecisonTree:: SaveTree(ofstream& fout,Node* node) { if(node==NULL) { return; } fout< isLeaf< value< attribute< leftchild!=NULL) SaveTree(fout,node->leftchild); if(node->rightchild!=NULL) SaveTree(fout,node->rightchild); } void DecisonTree:: SaveTree(char* fname) { ofstream fout(fname); saveTree(fout,root); fout.close(); } Node* DecisonTree:: LoadNode(ifstream& fin) { int i = 0; if(fin.eof()) { return NULL; } else { int isLeaf = 0; double value =0; int attribute = 0; fin>>isLeaf; fin>>value; fin>>attribute; Node* node = new Node(value,attribute,isLeaf); if(node->isLeaf ==1) { return node; } node->leftchild = LoadNode(fin); node->rightchild = LoadNode(fin); return node; } } void CDecisonTree:: LoadTree(char* fname) { ifstream fin(fname); root= LoadNode(fin); fin.close(); } void DecisonTree:: ShowTree(Node* node) { if(node ==NULL) return; cout<<"tree node"< GetValue()<<" "< GetAttribute()<<" "< IsLeaf()< leftchild!=NULL) { ShowTree(node->leftchild); } if(node->rightchild!=NULL) ShowTree(node->rightchild); }