www.pudn.com > tpWY.rar > tpWY.c




/* This program implements the permutation algorithm for the Westfall step-down
   adjusted p-values for the t-statistics, described in the paper by Dudoit S., Yang 
   Y.H., Callow M.J., Speed T.P. (2000) "Statistical methods for identifying 
   differentially expressed genes", UC Berkeley, technical report # 578. */

/* Created: Tue Oct 24 15:54:19 EDT 2000 */
/* Elisabetta Manduchi, CBIL, Penn Center for Bioinformatics */

#include 
#include 
#include 
#include "stat_util.h"
#include 

/* argv[1]: input file name. */
/* argv[2]: output file name. */
/* argv[3]: number of rows in the input file. */
/* argv[4]: total number of columns in the input file (the first column 
            should contain identifiers [up to 20 characters each] and should
            be separated by 1 tab from the next column). This number includes
            the indentifier column. */
/* argv[5]: number of columns in first group (in the input file these should
            precede the columns in the second group). */
/* argv[6]: the value (float) used to denote missing values */
/* argv[7]: can be 0 or 1; if set to 1, p-values are also computed. */
/* argv[8]: can be 0 or 1; if set to 1, all rearrangements are considered.
            This argument is ignored if argv[7]=0. */
/* argv[9]: number of rearrangements to use in the p-values calculation. 
            If this number is greater than the number of all possible 
            rearrangements, then all rearrangements are considered. This 
            argument is ignored when argv[7]=0 or when argv[8]=1. */

FILE *ofh;
char **id;
float **data, *arr1, *arr2,  *t_arr, *p, *adj_p, *t, *u, na;
int do_p, num_rows, num_cols, num_gp1, *gp1, *rest, B, B_new, *indx;

void mallocate();
int cmp(const void *v1, const void *v2);
void compute_t();
void print1();
void print2();
void read_infile(char *filename);
void simulation1();
void simulation2();
int update_p();

main(int argc, char *argv[]) {
  char *infile, *outfile;
  int do_all, c;
  time_t duration, start=time(0);


  if (argc < 8) { 
    fprintf(stderr,"Must provide at least 7 command line arguments in the following order:\ninput file name\noutput file name\nnumber of rows in the input file\nnumber of columns in the input file\nnumber of columns in first group\nthe float used to denote missing values\n0 if p-values should not be computed, 1 otherwise\n");
    exit(1);
  }  

  infile = argv[1];
  outfile = argv[2];
  num_rows = atol(argv[3]);
  num_cols = atoi(argv[4]);
  num_gp1 = atoi(argv[5]);
  na = (float)atof(argv[6]);
  do_p = atoi(argv[7]);
  if (do_p==1 && argc<9) {
    fprintf(stderr, "Must specify whether to consider all rearrangements or not.\n");
    exit(1);
  }
  if (do_p==1) {
    do_all = atoi(argv[8]);
  }
  if (do_p==1 && do_all==0 && argc<10) {
    fprintf(stderr, "Must provide the number of rearrangements to use in the p-values calculation.\n");
    exit(1);
  }
  if (do_p==1 && do_all==0) {
    B = atoi(argv[9]);
    if (B>=bicoeff(num_cols-1, num_gp1)) {
      do_all = 1;
      fprintf(stderr, "Since the number of desired rearrangements is greater than or equal to the number\nof all possible rearrangements, all possible rearrangements are employed.\n");
    }
  }
  if ((ofh=fopen(outfile,"w")) == NULL) {
    fprintf(stderr,"Cannot open %s for writing\n", outfile);
    exit(1);
  } 

  mallocate();
  read_infile(infile);
  compute_t();

  for (c=0; cfabs(*(t_arr+*(int *)v2)))
    return -1;
  else
    return 0; 
}

void compute_t() {
  int i, j;

  for (i=0; i=1) {
      *(adj_p+*(indx+i)) = (*(adj_p+*(indx+i))>=*(adj_p+*(indx+i-1))) ? *(adj_p+*(indx+i)) : *(adj_p+*(indx+i-1));
    }

  }
}

void simulation2() {
  int i, j, h;
  long seed = -time(0);

  B_new = B;
  /* fprintf(stderr, "here1\n"); */
  for (i=0; i=1) {
      *(adj_p+*(indx+i)) = (*(adj_p+*(indx+i))>=*(adj_p+*(indx+i-1))) ? *(adj_p+*(indx+i)) : *(adj_p+*(indx+i-1));
    }
  }
}

int update_p() {  
  int i, j, flag=0;

  for (i=0; i=fabs((*(t_arr+i)))) {
	*(p+i) += 1;
      }
    }     
    *(u+num_rows-1) = *(t+*(indx+num_rows-1));
    if (fabs((*(u+num_rows-1)))>=fabs((*(t_arr+*(indx+num_rows-1)))))
      *(adj_p+*(indx+num_rows-1)) += 1;
    for (i=num_rows-2; i>=0; i--) {
      *(u+i) = (fabs((*(u+i+1)))>= fabs((*(t+*(indx+i))))) ? *(u+i+1) : *(t+*(indx+i));
      if (fabs((*(u+i)))>=fabs((*(t_arr+*(indx+i)))))
	*(adj_p+*(indx+i)) += 1;
    }
  }
  return flag;
}