www.pudn.com > tpWY.rar > tpWY.c
/* This program implements the permutation algorithm for the Westfall step-down adjusted p-values for the t-statistics, described in the paper by Dudoit S., Yang Y.H., Callow M.J., Speed T.P. (2000) "Statistical methods for identifying differentially expressed genes", UC Berkeley, technical report # 578. */ /* Created: Tue Oct 24 15:54:19 EDT 2000 */ /* Elisabetta Manduchi, CBIL, Penn Center for Bioinformatics */ #include#include #include #include "stat_util.h" #include /* argv[1]: input file name. */ /* argv[2]: output file name. */ /* argv[3]: number of rows in the input file. */ /* argv[4]: total number of columns in the input file (the first column should contain identifiers [up to 20 characters each] and should be separated by 1 tab from the next column). This number includes the indentifier column. */ /* argv[5]: number of columns in first group (in the input file these should precede the columns in the second group). */ /* argv[6]: the value (float) used to denote missing values */ /* argv[7]: can be 0 or 1; if set to 1, p-values are also computed. */ /* argv[8]: can be 0 or 1; if set to 1, all rearrangements are considered. This argument is ignored if argv[7]=0. */ /* argv[9]: number of rearrangements to use in the p-values calculation. If this number is greater than the number of all possible rearrangements, then all rearrangements are considered. This argument is ignored when argv[7]=0 or when argv[8]=1. */ FILE *ofh; char **id; float **data, *arr1, *arr2, *t_arr, *p, *adj_p, *t, *u, na; int do_p, num_rows, num_cols, num_gp1, *gp1, *rest, B, B_new, *indx; void mallocate(); int cmp(const void *v1, const void *v2); void compute_t(); void print1(); void print2(); void read_infile(char *filename); void simulation1(); void simulation2(); int update_p(); main(int argc, char *argv[]) { char *infile, *outfile; int do_all, c; time_t duration, start=time(0); if (argc < 8) { fprintf(stderr,"Must provide at least 7 command line arguments in the following order:\ninput file name\noutput file name\nnumber of rows in the input file\nnumber of columns in the input file\nnumber of columns in first group\nthe float used to denote missing values\n0 if p-values should not be computed, 1 otherwise\n"); exit(1); } infile = argv[1]; outfile = argv[2]; num_rows = atol(argv[3]); num_cols = atoi(argv[4]); num_gp1 = atoi(argv[5]); na = (float)atof(argv[6]); do_p = atoi(argv[7]); if (do_p==1 && argc<9) { fprintf(stderr, "Must specify whether to consider all rearrangements or not.\n"); exit(1); } if (do_p==1) { do_all = atoi(argv[8]); } if (do_p==1 && do_all==0 && argc<10) { fprintf(stderr, "Must provide the number of rearrangements to use in the p-values calculation.\n"); exit(1); } if (do_p==1 && do_all==0) { B = atoi(argv[9]); if (B>=bicoeff(num_cols-1, num_gp1)) { do_all = 1; fprintf(stderr, "Since the number of desired rearrangements is greater than or equal to the number\nof all possible rearrangements, all possible rearrangements are employed.\n"); } } if ((ofh=fopen(outfile,"w")) == NULL) { fprintf(stderr,"Cannot open %s for writing\n", outfile); exit(1); } mallocate(); read_infile(infile); compute_t(); for (c=0; c fabs(*(t_arr+*(int *)v2))) return -1; else return 0; } void compute_t() { int i, j; for (i=0; i =1) { *(adj_p+*(indx+i)) = (*(adj_p+*(indx+i))>=*(adj_p+*(indx+i-1))) ? *(adj_p+*(indx+i)) : *(adj_p+*(indx+i-1)); } } } void simulation2() { int i, j, h; long seed = -time(0); B_new = B; /* fprintf(stderr, "here1\n"); */ for (i=0; i =1) { *(adj_p+*(indx+i)) = (*(adj_p+*(indx+i))>=*(adj_p+*(indx+i-1))) ? *(adj_p+*(indx+i)) : *(adj_p+*(indx+i-1)); } } } int update_p() { int i, j, flag=0; for (i=0; i =fabs((*(t_arr+i)))) { *(p+i) += 1; } } *(u+num_rows-1) = *(t+*(indx+num_rows-1)); if (fabs((*(u+num_rows-1)))>=fabs((*(t_arr+*(indx+num_rows-1))))) *(adj_p+*(indx+num_rows-1)) += 1; for (i=num_rows-2; i>=0; i--) { *(u+i) = (fabs((*(u+i+1)))>= fabs((*(t+*(indx+i))))) ? *(u+i+1) : *(t+*(indx+i)); if (fabs((*(u+i)))>=fabs((*(t_arr+*(indx+i))))) *(adj_p+*(indx+i)) += 1; } } return flag; }