www.pudn.com > malic.rar > csuCommonSubspace.c
/* * csuCommonSubspace.c * * Authors: Kai She, J. Ross Beveridge, David Bolme and Marcio Teixeira * */ /* Copyright (c) 2003 Colorado State University Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include/* How many lines in the training file header have useful text */ #define TRAINING_HEADER_ENTRIES 10 void subspaceTrain (Subspace *s, Matrix images, ImageList *srt, int numSubjects, int dropNVectors, CutOffMode cutOffMode, double cutOff, int useLDA, int writeTextInterm) { int i; Matrix m; int n = 0; /* The number of eigen vectors to keep */ double total_energy, energy; Matrix tmp; /* Initialize structure */ s->useLDA = useLDA; s->cutOffMode = cutOffMode; s->cutOff = cutOff; s->dropNVectors = dropNVectors; s->numSubjects = numSubjects; s->numPixels = images->row_dim; /********************************************************************* * STEP ONE: Calculate the eigenbasis ********************************************************************/ /* Compute the Eigenvalues and Eigenvectors for the covariance matrix derived from the images data matrix. The image data is "centered", meaning the mean image is subtracted from all images before PCA is performed. This centering is done in place, so after this call images are centered. */ MESSAGE("Computing the PCA eigenspace."); eigentrain (&s->mean, &s->values, &s->basis, images); MESSAGE("Finished computing eigenspace."); /* Numerical roundoff errors may lead to small negative values. Strip those before saving the matrix. */ m = s->values; for (i = 0; i < m->row_dim; i++) { if (ME(m,i,0) < 0) { if (ME(m,i,0) < -1e-10) printf("WARNING: Large negative eigenvalue found %f. Truncating to zero.\n", ME(m,i,0)); ME(m,i,0) = 0; } } /********************************************************************* * STEP TWO: Drop eigenvectors from the front or truncate them from * the back ********************************************************************/ /* The following is used to filter the vectors that are retained after PCA training. The function first optionally removes the vectors from the matrix based on the argument -dropNVectors. This argument is always intrepreted in terms of absolute numbers, i.e. a value of 1 means drop the first vector, a value of 3 means drop the first three. The function then drops vectors from the tail based on -cutOffMode and -cutOff arguments. Here, the mode controls how the cutoff is performed. The possible modes are: NONE: Keep all remaining eigenvectors. SIMPLE: Keep a percentage where the percentage is specified by cutOff. ENERGY: Keep the fewest vectors are such that the sum of energy for them just exceeds cutOff times the total energy. STRETCH: Keep all eigenvectors that have eigenvalues greater than a percentage of the largest, where the percentage is specied by cutOff. CLASSES: Keep as many eigenvectors as there are LDA classes. For both Energy and Stretch, if eigen values/vectors are dropped from the front, these are not included in the determination of the total energy or the max eigen value. */ /* Drop the first vectors */ DEBUG_CHECK (s->dropNVectors < (s->basis)->col_dim, "Number of vectors to drop must be less than the number of the eigen vectors"); /* transpose eigenValues for use in this function */ tmp = transposeMatrix (s->values); freeMatrix (s->values); s->values = tmp; if (s->dropNVectors && (s->dropNVectors < (s->values)->col_dim)) { tmp = matrixCols (s->basis, s->dropNVectors, (s->basis)->col_dim-1); freeMatrix (s->basis); s->basis = tmp; tmp = matrixCols (s->values, s->dropNVectors, (s->values)->col_dim-1); freeMatrix (s->values); s->values = tmp; } /* transpose the eigenValues back to the original order. */ tmp = transposeMatrix (s->values); freeMatrix (s->values); s->values = tmp; DEBUG_CHECK((s->values)->row_dim - s->dropNVectors > 0, "Too many eigen vectors droped from front. Can not proceed."); switch (s->cutOffMode) { case CUTOFF_NONE: n = (s->basis)->col_dim; break; case CUTOFF_SIMPLE: n = (int)((s->basis)->col_dim * s->cutOff / 100.0); break; case CUTOFF_ENERGY: /* compute total energy - this will not include vectors/values dropped from front. */ total_energy = 0; for (i = 0; i < (s->values)->row_dim; i++) { total_energy += ME(s->values, i, 0); } /* compute cutoff point */ i = 0; energy = 0; while ((i < (s->values)->row_dim) && (energy < total_energy * s->cutOff / 100.0)) { energy += ME(s->values, i, 0); i++; } n = i; break; case CUTOFF_STRETCH: i = 1; while ((i < (s->values)->row_dim) && (100.0*(ME(s->values, i, 0) / ME(s->values, s->dropNVectors, 0)) > cutOff )) { i++; } n = i; break; case CUTOFF_CLASSES: n = s->numSubjects; break; default: n = 0; DEBUG_CHECK (0, "ERROR: Unkown cutoff type"); break; }; /* Never set the dimensionality of the PCA subspace below the number of LDA classes when LDA is being used. Doing so creates a horrible problem for LDA: too fee dimensions */ if (s->useLDA && (n < s->numSubjects)) n = s->numSubjects; DEBUG_CHECK (n <= (s->basis)->col_dim, "Tried to expand, not contract, PCA space."); MESSAGE1ARG ("Retaining %d eigen vectors.",n); tmp = matrixCols ( s->basis, 0 , n-1); freeMatrix (s->basis); s->basis = tmp; DEBUG_INT (1, "Number of eigen vectors kept.", n); DEBUG_CHECK ((s->basis)->col_dim > 0, "All basis vectors deleted after cutoff " "and vector drop was processed."); MESSAGE2ARG("Truncating PCA Space. Subspace projection expressed " "as %d by %d matrix.", s->basis->row_dim, s->basis->col_dim); /********************************************************************* * STEP THREE: Do the LDA if specified ********************************************************************/ if (s->useLDA) { /* Need to project original images into PCA space */ Matrix fisherBasis, fisherValues, combinedBasis; Matrix imspca = transposeMultiplyMatrixL (s->basis, images); MESSAGE("Computing Fisher Linear Discriminants for " "training images projected into PCA subspace."); fisherTrain (imspca, srt, &fisherBasis, &fisherValues, writeTextInterm); combinedBasis = multiplyMatrix (s->basis, fisherBasis); basis_normalize (combinedBasis); MESSAGE2ARG ("PCA and LDA Combined. Combined projection expressed as %d by " "%d matrix.", combinedBasis->row_dim, combinedBasis->col_dim); s->values = fisherValues; s->basis = combinedBasis; } } /** writeSubspace Writes out a training discription file that includes important parameters and command line options and all eigenvectors and eigenvalues format: text: 256 lines reserved line1: TRAINING_COMMAND = commandline line2: DATE = line3: FILE_LIST = line4: VECTOR_LENGTH = line5: USE_LDA = line6: PCA_CUTOFF = line7: BASIS_VALUE_COUNT = line8: BASIS_VECTOR_COUNT = line9: DROPPED_FROM_FRONT = line10 -> line256: RESERVED If the additional entries are added to the header, increment the variable TRAINING_HEADER_ENTRIES */ void writeSubspace (Subspace *s, char *training_filename, char *imageList, int argc, char**argv) { int i, j; FILE* file; char *cutOffModeStr; time_t ttt = time(0); switch (s->cutOffMode) { case CUTOFF_NONE: cutOffModeStr = "NONE"; break; case CUTOFF_SIMPLE: cutOffModeStr = "SIMPLE"; break; case CUTOFF_ENERGY: cutOffModeStr = "ENERGY"; break; case CUTOFF_STRETCH: cutOffModeStr = "STRETCH"; break; default: cutOffModeStr = "UNKNOWN"; break; } MESSAGE1ARG ("Saving trianing information to file %s", training_filename); file = fopen (training_filename, "wb"); if (!file) { printf ("Error: could not open file <%s>\n", training_filename); exit (1); } fprintf (file, "TRAINING_COMMAND ="); for (i = 0; i < argc; i++) fprintf (file, " %s", argv[i]); fprintf (file, "\n"); fprintf (file, "DATE = %s", ctime(&ttt)); fprintf (file, "FILE_LIST = %s\n", imageList); fprintf (file, "VECTOR_LENGTH = %d\n", s->basis->row_dim); /* numPixels */ fprintf (file, "USE_LDA = %s\n", s->useLDA ? "YES" : "NO" ); fprintf (file, "CUTOFF_MODE = %s\n", cutOffModeStr); fprintf (file, "CUTOFF_PERCENTAGE = %f\n", s->cutOff); fprintf (file, "BASIS_VALUE_COUNT = %d\n", s->values->row_dim); /* basisDim */ fprintf (file, "BASIS_VECTOR_COUNT = %d\n", s->basis->col_dim); fprintf (file, "DROPPED_FROM_FRONT = %d\n", s->dropNVectors); for (i = 11; i < 256; i++){ fprintf (file, "\n"); } /* write out the pixel count */ writeInt (file, s->mean->row_dim); /* write out the mean vector */ for (i = 0; i < s->mean->row_dim; i++) { writeDouble (file, ME(s->mean,i,0)); } /* write out the number of eigen values */ writeInt (file, s->values->row_dim); /* write out the eigen values */ for (i = 0; i < s->values->row_dim; i++) { writeDouble (file, ME(s->values,i,0)); } /* write out the number of basis vectors */ writeInt (file,s->basis->col_dim); /* write out the eigen basis. the size is "pixelcount"X"number of vectors"*/ for (i = 0; i < s->basis->col_dim; i++) { for (j = 0; j < s->basis->row_dim; j++) { writeDouble (file, ME(s->basis, j, i)); } } fclose (file); } /* The contents of the training file are described in csuSupbspaceTrain.c above the definition of writeTrainingFile. This companion function reads one of these files and creates the subspace basis vectors and eigenvalues. In the case of PCA+LDA, the eigenvalues are for the transformed, or second, of the two symmetric eigenvector problems solved. The variable TRAINING_HEADER_ENTRIES defined at the top of this file is used to control how many header lines are printed to standard out. It needs to be adjusted if new entries are added to the header. */ void readSubspace (Subspace *s, const char* trainingFile, int quiet) { int i, j, headerSize, rowDim, colDim; char junk[FILE_LINE_LENGTH], text[FILE_LINE_LENGTH]; char** header; FILE* file; headerSize = 255; header = (char**) malloc (sizeof(char*) * headerSize); assert (header); for (i = 0; i < headerSize; i++) { header[i] = (char*) malloc(sizeof(char) * FILE_LINE_LENGTH); assert (header[i]); } file = fopen (trainingFile, "rb"); if (!file) { printf("Error: could not open file <%s>\n", trainingFile); exit(1); } for (i = 0; i < headerSize; i++) fgets(header[i], FILE_LINE_LENGTH, file); if (!quiet) { printf("\nTraining Header File is:\n"); for (i = 0; i < TRAINING_HEADER_ENTRIES; i++) printf(" Line %d: %s", i, header[i]); } sscanf(header[7], "%s%s%d", junk, junk, &s->basisDim); sscanf(header[4], "%s%*s%s", junk, text); if (strcmp(text, "NO") == 0) s->useLDA = 0; else s->useLDA = 1; readInt (file,&rowDim); s->numPixels = rowDim; DEBUG_INT (3, "Vector size", rowDim); s->mean = makeMatrix(rowDim, 1); for (i = 0; i < (s->mean)->row_dim; i++) { readDouble (file, &ME(s->mean,i,0)); } readInt (file,&rowDim); s->values = makeMatrix (rowDim, 1); for (i = 0; i < (s->values)->row_dim; i++) { readDouble (file, &ME(s->values,i,0)); } rowDim = s->numPixels; readInt (file,&colDim); s->basis = makeMatrix (rowDim, colDim); for (i = 0; i < (s->basis)->col_dim; i++) { for (j = 0; j < (s->basis)->row_dim; j++) { readDouble (file, &ME(s->basis, j, i)); } } fclose(file); } void validateBasisIsOrthonormal (Matrix basis, int printlevel) { int i, j; FTYPE tolerance = 0.000001; Matrix test = transposeMultiplyMatrixL(basis, basis); for (i = 0; i < test->row_dim; i++) { for (j = 0; j < test->col_dim; j++) { if (i == j) { if (ABS(ME(test, i, j) - 1.0) > tolerance) { fprintf(stderr, "WARNING: Subspace basis failed orthonormality check at (%d, %d) value: %f\n", i, j, ME(test, i, j)); } } else { if (ABS(ME(test, i, j)) > tolerance) { fprintf(stderr, "WARNING: Subspace basis failed orthonormality check at (%d, %d) value: %f\n", i, j, ME(test, i, j)); } } } } if (printlevel > 0) printf("\nSubspace Basis Passed Orthonormality Check"); } Matrix centerThenProjectImages (Subspace *s, Matrix images) { Matrix subspims; mean_subtract_images (images, s->mean); subspims = transposeMultiplyMatrixL (s->basis, images); return subspims; } /* This function reads images in to a vector. That vector is then mean subtracted and then projected onto an optimal basis (PCA or LDA). Returned is a matrix that contains the images after they have been projected onto the subspace. */ Matrix readAndProjectImages (Subspace *s, char *imageNamesFile, char *imageDirectory, int *numImages, ImageList **srt) { int i, j; Matrix images, vector, smallVector; char name[FILE_LINE_LENGTH]; ImageList *subject, *replicate; DEBUG(1, "Reading training file names from file"); *srt = getImageNames(imageNamesFile, numImages); DEBUG_CHECK(*srt, "Error: header no imagenames found in file image list file"); /* Automatically determine number of pixels in images */ sprintf(name, "%s/%s", imageDirectory, (*srt)->filename); DEBUG(1, "Autodetecting number of pixels, i.e. vector length based on the size of image 0."); DEBUG_CHECK (autoFileLength(name) == s->numPixels, "Images sizes do not match subspace basis vector size"); DEBUG_INT(1, "Vector length", s->numPixels); DEBUG_CHECK(s->numPixels > 0, "Error positive value required for a Vector Length"); /*Images stored in the columns of a matrix */ DEBUG(1, "Allocating image matrix"); images = makeMatrix(s->basis->col_dim, *numImages); vector = makeMatrix(s->numPixels, 1); i = 0; for (subject = *srt; subject; subject = subject->next_subject) { for (replicate = subject; replicate; replicate = replicate->next_replicate) { if (debuglevel > 0) printf("%s ", replicate->filename); sprintf(name, "%s/%s", imageDirectory, replicate->filename); replicate->imageIndex = i; readFile(name, 0, vector); writeProgress("Reading images", i,*numImages); smallVector = centerThenProjectImages(s, vector); /* Copy the smaller vector into the image matrix*/ for (j = 0; j < smallVector->row_dim; j++) { ME(images, j, i) = ME(smallVector, j, 0); } freeMatrix(smallVector); i++; /* increament the image index */ } if (debuglevel > 0) printf("\n"); } return images; } /* basis_normalize Using the snapshot method, the Eigenvectors of the original problem are derived from the eigenvectors of the smaller covariance matrix by premultiplying the original eigenvectors by the data matrix. This operation give Eigenvectors that point in the proper direction, but that are not yet of unit length. So, we normalize them. */ void basis_normalize(Matrix eigenvectors) { int i, j; FTYPE sumsqr, inv_len; for (i = 0; i < eigenvectors->col_dim; i++) { sumsqr = 0.0; for (j = 0; j < eigenvectors->row_dim; j++) sumsqr += ME(eigenvectors, j, i) * ME(eigenvectors, j, i); if (sumsqr != 0) inv_len = 1.0 / sqrt(sumsqr); else inv_len = 0; for (j = 0; j < eigenvectors->row_dim; j++) ME(eigenvectors, j, i) *= inv_len; } } /* mean_subtract_images This function subtracts a mean image from a set of images in matrix images */ void mean_subtract_images (Matrix images, Matrix mean) { int i, j; for (i = 0; i < images->row_dim; i++) { for (j = 0; j < images->col_dim; j++) { ME(images, i, j) -= ME(mean, i, 0); } } } /* get_mean_image This function takes a matrix of images and returns the mean of all of the images in the matrix. */ Matrix get_mean_image (Matrix images) { int i, j; Matrix mean = makeMatrix(images->row_dim, 1); for (i = 0; i < images->row_dim; i++) { ME(mean, i, 0) = 0.0; for (j = 0; j < images->col_dim; j++) ME(mean, i, 0) += ME(images, i, j); ME(mean, i, 0) = ME(mean, i, 0) / images->col_dim; } return mean; }