www.pudn.com > SVM20.rar > svm_common.cpp
#include "stdafx.h"
#include "svm.h"
#include "svm_common.h"
#include "math.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
static char temstr[200];
/* classifies example */
double classify_example(MODEL *model,DOC *ex)
{
register long i;
register double dist;
dist=0;
for(i=1;isv_num;i++) {
dist+=kernel(&model->kernel_parm,model->supvec[i],ex)*model->alpha[i];
}
return(dist-model->b);
}
/* classifies example for linear kernel
important: the model must have the linear weight vector computed
important: the feature numbers in the example to classify must
not be larger than the weight vector! */
double classify_example_linear(MODEL *model,DOC *ex)
{
return((double)(sprod_ns(model->lin_weights,ex->words)-model->b));
}
/* calculate the kernel function */
CFLOAT kernel(KERNEL_PARM *kernel_parm,DOC *a,DOC*b)
{
com_result.kernel_cache_statistic++;
switch(kernel_parm->kernel_type)
{
case 0: /* linear */
return((CFLOAT)sprod_ss(a->words,b->words));
case 1: /* polynomial */
return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree));
case 2: /* radial basis function */
return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq)));
case 3: /* sigmoid neural net */
return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const));
case 4: /* custom-kernel supplied in file kernel.h*/
return((CFLOAT)custom_kernel(kernel_parm,a,b));
//chen .test sum of
//return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)+exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq)));
default: sprintf(temstr,"Error: Unknown kernel function");
printm(temstr);
return (-1);
}
}
/* compute the inner product of two sparse vectors */
double sprod_ss(SVM_WORD *a,SVM_WORD*b)
{
register FVAL sum=0;
register SVM_WORD *ai,*bj;
ai=a;
bj=b;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum) {
bj++;
}
else if (ai->wnum < bj->wnum) {
ai++;
}
else {
sum+=ai->weight * bj->weight;
ai++;
bj++;
}
}
return((double)sum);
}
/* compute the inner product of two sparse vectors,b right shit 1 bit */
double sprod_ss1(SVM_WORD *a,SVM_WORD*b,int offset)
{
register FVAL sum=0;
register SVM_WORD *ai,*bj;
ai=a;
bj=b;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum+offset) {
bj++;
}
else if (ai->wnum < bj->wnum+offset) {
ai++;
}
else
{
int np=(ai->wnum-1)%16+1+offset;
if (np>0 && np<17)
sum+=ai->weight * bj->weight;
ai++;
bj++;
}
}
return((double)sum);
}
double sprod_ss2(SVM_WORD *a,SVM_WORD*b,int offset)
{
register FVAL sum=0;
register SVM_WORD *ai,*bj;
ai=a;
bj=b;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum+offset) {
bj++;
}
else if (ai->wnum < bj->wnum+offset) {
ai++;
}
else
{
int np=ai->wnum+offset;
if (np>0 && np<257)
sum+=ai->weight * bj->weight;
ai++;
bj++;
}
}
return((double)sum);
}
/* compute length of weight vector */
double model_length_s(MODEL *model,KERNEL_PARM *kernel_parm)
{
register long i,j;
register double sum=0,alphai;
register DOC *supveci;
for(i=1;isv_num;i++) {
alphai=model->alpha[i];
supveci=model->supvec[i];
for(j=1;jsv_num;j++) {
sum+=alphai*model->alpha[j]
*kernel(kernel_parm,supveci,model->supvec[j]);
}
}
return(sqrt(sum));
}
void clear_vector_n(double *vec,long n)
{
register long i;
for(i=0;i<=n;i++) vec[i]=0;
}
void add_vector_ns(double *vec_n,SVM_WORD *vec_s,double faktor)
{
register SVM_WORD *ai;
ai=vec_s;
while (ai->wnum) {
vec_n[ai->wnum]+=(faktor*ai->weight);
ai++;
}
}
double sprod_ns(double *vec_n,SVM_WORD *vec_s)
{
register double sum=0;
register SVM_WORD *ai;
ai=vec_s;
while (ai->wnum) {
sum+=(vec_n[ai->wnum]*ai->weight);
ai++;
}
return(sum);
}
/* compute weight vector in linear case and add to model*/
void add_weight_vector_to_linear_model(MODEL *model)
{
long i;
model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1));
clear_vector_n(model->lin_weights,model->totwords);
for(i=1;isv_num;i++) {
add_vector_ns(model->lin_weights,(model->supvec[i])->words,
model->alpha[i]);
}
}
int read_model(char *modelfile,MODEL *model,long max_words,long ll)
{
FILE *modelfl;
long j,i;
char *line;
SVM_WORD *words;
register long wpos;
long wnum,pos;
double weight;
char version_buffer[100];
if (com_pro.show_action)
{
sprintf(temstr,"Reading model...");
printm(temstr);
}
words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(max_words+10));
line = (char *)my_malloc(sizeof(char)*ll);
if ((modelfl = fopen (modelfile, "r")) == NULL)
{
printe (modelfile);
return -1;
}
fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
if(strcmp(version_buffer,VERSION))
{
printe ("Version of model-file does not match version of svm_classify!");
return -1;
}
fscanf(modelfl,"%ld # kernel type\n",&(model->kernel_parm.kernel_type));
fscanf(modelfl,"%ld # kernel parameter -d \n",&(model->kernel_parm.poly_degree));
fscanf(modelfl,"%lf # kernel parameter -g \n",&(model->kernel_parm.rbf_gamma));
fscanf(modelfl,"%lf # kernel parameter -s \n",&(model->kernel_parm.coef_lin));
fscanf(modelfl,"%lf # kernel parameter -r \n",&(model->kernel_parm.coef_const));
fscanf(modelfl,"%s # kernel parameter -u \n",&(model->kernel_parm.custom));
fscanf(modelfl,"%ld # highest feature index \n",&(model->totwords));
fscanf(modelfl,"%ld # number of training documents \n",&(model->totdoc));
fscanf(modelfl,"%ld # number of support vectors plus 1 \n",&(model->sv_num));
fscanf(modelfl,"%lf # threshold b \n",&(model->b));
for(i=1;isv_num;i++)
{
fgets(line,(int)ll,modelfl);
pos=0;
wpos=0;
sscanf(line,"%lf",&model->alpha[i]);
while(line[++pos]>' ');
while((sscanf(line+pos,"%ld:%lf",&wnum,&weight) != EOF) && (wpos' ');
words[wpos].wnum=wnum;
words[wpos].weight=weight;
wpos++;
}
model->supvec[i] = (DOC *)my_malloc(sizeof(DOC));
(model->supvec[i])->words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(wpos+1));
for(j=0;jsupvec[i])->words[j]=words[j];
}
((model->supvec[i])->words[wpos]).wnum=0;
(model->supvec[i])->twonorm_sq = sprod_ss((model->supvec[i])->words,(model->supvec[i])->words);
(model->supvec[i])->docnum = -1;
}
fclose(modelfl);
free(line);
free(words);
if (com_pro.show_readfile)
{
sprintf(temstr, "OK. (%d support vectors read)",(int)(model->sv_num-1));
printm(temstr);
}
}
/*read the data from text documents*/
int read_documents(char *docfile,
DOC *docs,
long *label,
long max_words_doc,
long ll,
long *totwords,
long *totdoc)
{
char *line;
DOC doc;
long dnum=0,wpos,i,dpos=0,dneg=0,dunlab=0;
long doc_label;
FILE *docfl;
line = (char *)my_malloc(sizeof(char)*ll);
if ((docfl = fopen (docfile, "r")) == NULL)
{
printe (docfile);
return -1;
}
doc.words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(max_words_doc+10));
if (com_pro.show_readfile)
{
sprintf(temstr,"Reading examples into memory...");
printm(temstr);
}
dnum=0;
(*totwords)=0;
while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {
if(line[0] == '#') continue; /* line contains comments */
if(!parse_document(line,&doc,&doc_label,&wpos,max_words_doc))
{
sprintf(temstr,"Parsing error in line %ld!",dnum);
printm(temstr);
}
label[dnum]=doc_label;
if(doc_label > 0) dpos++;
if (doc_label < 0) dneg++;
if (doc_label == 0) dunlab++;
if((wpos>1) && ((doc.words[wpos-2]).wnum>(*totwords)))
(*totwords)=(doc.words[wpos-2]).wnum;
docs[dnum].words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*wpos);
docs[dnum].docnum=dnum;
for(i=0;i' ') pos++;
while((sscanf(line+pos,"%ld:%lf",&wnum,&weight) != EOF) && (wpos' ');
if(wnum<=0)
{
printe ("Feature numbers must be larger or equal to 1!!!");
sprintf(temstr,"LINE: %s",line);
printm(temstr);
return (0);
}
if((wpos>0) && ((doc->words[wpos-1]).wnum >= wnum))
{
printe ("Features must be in increasing order!!!");
sprintf(temstr,"LINE: %s",line);
printm(temstr);
return (0);
}
(doc->words[wpos]).wnum=wnum;
(doc->words[wpos]).weight=weight;
wpos++;
}
(doc->words[wpos]).wnum=0;
(*numwords)=wpos+1;
doc->docnum=-1;
doc->twonorm_sq=sprod_ss(doc->words,doc->words);
return(1);
}
/* grep through file and count number of lines,
maximum number of spaces per line, and
longest line. */
int nol_ll(char *file,long *nol,long *wol,long *ll)
{
FILE *fl;
int ic;
char c;
long current_length,current_wol;
if ((fl = fopen (file, "r")) == NULL)
{
printe (file);
return -1;
}
current_length=0;
current_wol=0;
(*ll)=0;
(*nol)=1;
(*wol)=0;
while((ic=getc(fl)) != EOF)
{
c=(char)ic;
current_length++;
if(c == ' ')
{
current_wol++;
}
if(c == '\n')
{
(*nol)++;
if(current_length>(*ll))
{
(*ll)=current_length;
}
if(current_wol>(*wol))
{
(*wol)=current_wol;
}
current_length=0;
current_wol=0;
}
}
fclose(fl);
}
long minl(long a,long b)
{
if(ab)
return(a);
else
return(b);
}
long get_runtime()
{
clock_t start;
start = clock();
return((long)((double)start*100.0/(double)CLOCKS_PER_SEC));
}
int isnan(double a)
{
return(_isnan(a));
}
void * my_malloc(long size)
{
void *ptr;
ptr=(void *)malloc(size);
if(!ptr)
{
printe ("Out of memory!");
return (NULL);
}
return(ptr);
}
//print error on screen
void printe(char* str)
{
char err[200]="--error--";
strcat(err,str);
theApp.ShowM(err);
}
//print message on screen
void printm(char* str)
{
theApp.ShowM(str);
}
//custom kernel function
/////////////////////////////////////////////////////////////////
//chen 2001.09.14
/////////////////////////////////////////////////////////////////
double ktt(int ta,int tb,double pa[],double pb[])
{
int ya,yb;
ya=((ta-1)%13)+1;
yb=((tb-1)%13)+1;
if (ya<13&&yb<13)
return pa[ta]*pa[ta+1]*pb[tb]*pb[tb+1];
else return 0.0;
}
double kt(int t,double ta[],double tb[])
{
int x,y;
double sum=0.0;
x=((t-1)/16)+1;
y=((t-1)%16)+1;
if (x>1)
sum+=ta[t]*tb[t]*ta[t-16]*tb[t-16];
if (x>2)
sum+=ta[t]*tb[t]*ta[t-32]*tb[t-32];
if (x>3)
sum+=ta[t]*tb[t]*ta[t-48]*tb[t-48];
if (x<14)
sum+=ta[t]*tb[t]*ta[t-48]*tb[t-48];
if (x<15)
sum+=ta[t]*tb[t]*ta[t-32]*tb[t-32];
if (x<16)
sum+=ta[t]*tb[t]*ta[t+16]*tb[t+16];
if (y>3)
sum+=ta[t]*tb[t]*ta[t-3]*tb[t-3];
if (y>2)
sum+=ta[t]*tb[t]*ta[t-2]*tb[t-2];
if (y>1)
sum+=ta[t]*tb[t]*ta[t-1]*tb[t-1];
if (y<14)
sum+=ta[t]*tb[t]*ta[t+3]*tb[t+3];
if (y<15)
sum+=ta[t]*tb[t]*ta[t+2]*tb[t+2];
if (y<16)
sum+=ta[t]*tb[t]*ta[t+1]*tb[t+1];
return sum;
}
double fi(double* tt)
{
int x,y;
double sum=0.0;
for (int t=1;t<=52;t++)
{
x=((t-1)/13)+1;
y=((t-1)%13)+1;
if (y<13)
sum+=tt[t]*tt[t+1];
}
return sum;
}
double fs(double ta[])
{
double sum=0.0;
int x,y;
for (int i=1;i<256;i++)
{
x=((i-1)/16)+1;
y=((i-1)%16)+1;
if (x<16)
sum+=ta[i]*ta[i+16];
if (y<16)
sum+=ta[i]*ta[i+1];
}
return sum;
}
double sumofword(DOC* a)
{
double sum=0.0;
SVM_WORD* pwa=a->words;
while (pwa->wnum)
{
sum+=pwa->weight;
pwa++;
}
return sum;
}
double custom_kernel(KERNEL_PARM *kernel_parm,DOC *a,DOC*b)
{
double sum=0;
SVM_WORD *ai,*bj;
ai=a->words;
bj=b->words;
while (ai->wnum || bj->wnum)
{
if(ai->wnum == bj->wnum)
{
sum+=(fabs(ai->weight-bj->weight))*(fabs(ai->weight-bj->weight));
ai++;bj++;
}
else if ((ai!=0) &&(ai->wnumwnum || bj->wnum==0))
{
sum+=fabs(ai->weight)*fabs(ai->weight);
ai++;
}
else if ((bj!=0) &&(bj->wnumwnum|| ai->wnum==0))
{
sum+=fabs(bj->weight)*fabs(bj->weight);
bj++;
}
}
// case 1: /* polynomial *///
//return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree));
//case 2: /* radial basis function */
// return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq)));
//case 3: /* sigmoid neural net */
// return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const));
//case 4: /* custom-kernel supplied in file kernel.h*/
// return((CFLOAT)custom_kernel(kernel_parm,a,b));
/*
SVM_WORD *ai,*bj;
ai=a->words;
bj=b->words;
double suma=0.0,sumb=0.0;
while (ai->wnum )
{
suma+=ai->weight;
ai++;
}
while (bj->wnum )
{
sumb+=bj->weight;
bj++;
}
*/
// double K_rbf=exp(-0.001*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq));
double K_Laplace=exp(-0.0001*sum);
double K_poly=pow(sprod_ss(a->words,b->words)+20,3);
// double K_linear=sprod_ss(a->words,b->words);
//double sum;
//double sum=suma*sumb;
//sum=K_rbf*K_poly;
//double sum=fabs(pro*pro+pro-tan(pro));
return K_Laplace*K_poly;
}
void SetInitParam()
{
com_param.biased_Hyperplane=1;
com_param.remove_inconsitant=0;
com_param.C =0.0;
com_param.cost_factor =1.0;
com_param.loo =0;
com_param.search_depth = 0;
com_param.rho = 1.0;
com_param.fraction =1.0;
com_param.rbf_gamma =1.0;
com_param.poly_c =0.0;
com_param.poly_s =1.0;
com_param.poly_degree =1;
com_param.kernel_type =0;
//com_param.user_u = pp4.m_strU ;
com_param.epsion =0.001;
com_param.iteration_time =100;
com_param.cache_size =40;
com_param.new_variable =com_param.maximum_size;
com_param.maximum_size =10;
com_param.final_test = 1;
com_param.blWriteModel=TRUE;
com_param.Running=FALSE;
com_param.Finished=TRUE;
com_param.Close=FALSE;
//prompt default values
com_pro.show_action=TRUE;
com_pro.show_compute_1=TRUE;
com_pro.show_compute_2=FALSE;
com_pro.show_compute_3=FALSE;
com_pro.show_other=FALSE;
com_pro.show_readfile=FALSE;
com_pro.show_writefile=FALSE;
com_pro.show_testresult=TRUE;
com_pro.show_trainresult=FALSE;
}