www.pudn.com > weka.rar > RaceSearch.java, change:2001-03-14,size:54163b


/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    RaceSearch.java
 *    Copyright (C) 2000 Mark Hall
 *
 */

package  weka.attributeSelection;

import java.io.*;
import java.util.*;
import weka.core.*;
import weka.experiment.PairedStats;
import weka.experiment.Stats;

/** 
 * Class for performing a racing search. <p>
 *
 * For more information see: <br>
 * Moore, A. W. and Lee, M. S. (1994). Efficient algorithms for minimising
 * cross validation error. Proceedings of the Eleventh International
 * Conference on Machine Learning. pp 190--198. <p>
 *
 * Valid options are:<p>
 *
 * -R <race type><br>
 * 0 = forward, 1 = backward, 2 = schemata, 3 = rank. <p>
 * 
 * -L <significance level> <br>
 * significance level to use for t-tests. <p>
 *
 * -T <threshold> <br>
 * threshold for considering mean errors of two subsets the same <p>
 *
 * -F <xval type> <br>
 * 0 = 10 fold, 1 = leave-one-out (selected automatically for schemata race
 * <p>
 *
 * -A <attribute evaluator> <br>
 * the attribute evaluator to use when doing a rank search <p>
 *
 * -Q <br>
 * produce a ranked list of attributes. Selecting this option forces
 * the race type to be forward. Racing continues until *all* attributes
 * have been selected, thus producing a ranked list of attributes. <p>
 *
 * -N <number to retain> <br>
 * Specify the number of attributes to retain. Overides any threshold. 
 * Use in conjunction with -Q. <p>
 * 
 * -J <threshold> <br>
 * Specify a threshold by which the AttributeSelection module can discard
 * attributes. Use in conjunction with -Q. <p>
 *
 * -Z <br>
 * Turn on verbose output for monitoring the search <p>
 *
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision: 1.8 $
 */
public class RaceSearch extends ASSearch implements RankedOutputSearch, 
						    OptionHandler {

  /* the training instances */
  private Instances m_Instances = null;

  /** search types */
  private static final int FORWARD_RACE = 0;
  private static final int BACKWARD_RACE = 1;
  private static final int SCHEMATA_RACE = 2;
  private static final int RANK_RACE = 3;
  public static final Tag [] TAGS_SELECTION = {
    new Tag(FORWARD_RACE, "Forward selection race"),
    new Tag(BACKWARD_RACE, "Backward elimination race"),
    new Tag(SCHEMATA_RACE, "Schemata race"),
    new Tag(RANK_RACE, "Rank race")
      };
  
  /** the selected search type */
  private int m_raceType = FORWARD_RACE;
  
  /** xval types */
  private static final int TEN_FOLD = 0;
  private static final int LEAVE_ONE_OUT = 1;
  public static final Tag [] XVALTAGS_SELECTION = {
    new Tag(TEN_FOLD, "10 Fold"),
    new Tag(LEAVE_ONE_OUT, "Leave-one-out"),
      };

  /** the selected xval type */
  private int m_xvalType = TEN_FOLD;
  
  /** the class index */
  private int m_classIndex;

  /** the number of attributes in the data */
  private int m_numAttribs;

  /** the total number of partially/fully evaluated subsets */
  private int m_totalEvals;

  /** holds the merit of the best subset found */
  private double m_bestMerit = -Double.MAX_VALUE;

  /** the subset evaluator to use */
  private HoldOutSubsetEvaluator m_theEvaluator = null;

  /** the significance level for comparisons */
  private double m_sigLevel = 0.001;

  /** threshold for comparisons */
  private double m_delta = 0.001;

  /** the number of samples above which to begin testing for similarity
      between competing subsets */
  private int m_samples = 20;

  /** number of cross validation folds---equal to the number of instances
      for leave-one-out cv */
  private int m_numFolds = 10;

  /** the attribute evaluator to generate the initial ranking when
      doing a rank race */
  private ASEvaluation m_ASEval = new GainRatioAttributeEval();

  /** will hold the attribute ranking produced by the above attribute
      evaluator if doing a rank search */
  private int [] m_Ranking;

  /** verbose output for monitoring the search and debugging */
  private boolean m_debug = false;

  /** If true then produce a ranked list of attributes by fully traversing
      a forward hillclimb race */
  private boolean m_rankingRequested = false;

  /** The ranked list of attributes produced if m_rankingRequested is true */
  private double [][] m_rankedAtts;

  /** The number of attributes ranked so far (if ranking is requested) */
  private int m_rankedSoFar;

  /** The number of attributes to retain if a ranking is requested. -1
      indicates that all attributes are to be retained. Has precedence over
      m_threshold */
  private int m_numToSelect = -1;

  private int m_calculatedNumToSelect = -1;

  /** the threshold for removing attributes if ranking is requested */
  private double m_threshold = -Double.MAX_VALUE;

  /**
   * Returns a string describing this search method
   * @return a description of the search method suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "Races the cross validation error of competing "
      +"attribute subsets. Use in conjuction with a ClassifierSubsetEval. "
      +"RaceSearch has four modes:\n\nforward selection "
      +"races all single attribute additions to a base set (initially "
      +" no attributes), selects the winner to become the new base set "
      +"and then iterates until there is no improvement over the base set. "
      +"\n\nBackward elimination is similar but the initial base set has all "
      +"attributes included and races all single attribute deletions. "
      +"\n\nSchemata search is a bit different. Each iteration a series of "
      +"races are run in parallel. Each race in a set determines whether "
      +"a particular attribute should be included or not---ie the race is "
      +"between the attribute being \"in\" or \"out\". The other attributes "
      +"for this race are included or excluded randomly at each point in the "
      +"evaluation. As soon as one race "
      +"has a clear winner (ie it has been decided whether a particular "
      +"attribute should be inor not) then the next set of races begins, "
      +"using the result of the winning race from the previous iteration as "
      +"new base set.\n\nRank race first ranks the attributes using an "
      +"attribute evaluator and then races the ranking. The race includes "
      +"no attributes, the top ranked attribute, the top two attributes, the "
      +"top three attributes, etc.\n\nIt is also possible to generate a "
      +"raked list of attributes through the forward racing process. "
      +"If generateRanking is set to true then a complete forward race will "
      +"be run---that is, racing continues until all attributes have been "
      +"selected. The order that they are added in determines a complete "
      +"ranking of all the attributes.\n\nRacing uses paired and unpaired "
      +"t-tests on cross-validation errors of competing subsets. When there "
      +"is a significant difference between the means of the errors of two "
      +"competing subsets then the poorer of the two can be eliminated from "
      +"the race. Similarly, if there is no significant difference between "
      +"the mean errors of two competing subsets and they are within some "
      +"threshold of each other, then one can be eliminated from the race. ";
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String raceTypeTipText() {
    return "Set the type of search.";
  }

  /**
   * Set the race type
   *
   * @param d the type of race
   */
  public void setRaceType (SelectedTag d) {
    
    if (d.getTags() == TAGS_SELECTION) {
      m_raceType = d.getSelectedTag().getID();
    }
    if (m_raceType == SCHEMATA_RACE && !m_rankingRequested) {
      try {
	setFoldsType(new SelectedTag(LEAVE_ONE_OUT,
				     XVALTAGS_SELECTION));
	setSignificanceLevel(0.01);
      } catch (Exception ex) {
      }
    } else {
      try {
	setFoldsType(new SelectedTag(TEN_FOLD,
				     XVALTAGS_SELECTION));
	setSignificanceLevel(0.001);
      } catch (Exception ex) {
      }
    }
  }

  /**
   * Get the race type
   *
   * @return the type of race
   */
  public SelectedTag getRaceType() {
    return new SelectedTag(m_raceType, TAGS_SELECTION);
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String significanceLevelTipText() {
    return "Set the significance level to use for t-test comparisons.";
  }

  /**
   * Sets the significance level to use
   * @param sig the significance level
   */
  public void setSignificanceLevel(double sig) {
    m_sigLevel = sig;
  }

  /**
   * Get the significance level
   * @return the current significance level
   */
  public double getSignificanceLevel() {
    return m_sigLevel;
  }
  
  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String thresholdTipText() {
    return "Set the error threshold by which to consider two subsets "
      +"equivalent.";
  }

  /**
   * Sets the threshold for comparisons
   * @param t the threshold to use
   */
  public void setThreshold(double t) {
    m_delta = t;
  }

  /**
   * Get the threshold
   * @return the current threshold
   */
  public double getThreshold() {
    return m_delta;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String foldsTipText() {
    return "Set the number of folds to use for x-val error estimation. "
      +"Leave-one-out is selected automatically for schemata search.";
  }

  /**
   * Set the xfold type
   *
   * @param d the type of xval
   */
  public void setFoldsType (SelectedTag d) {
    
    if (d.getTags() == XVALTAGS_SELECTION) {
      m_xvalType = d.getSelectedTag().getID();
    }
  }

  /**
   * Get the xfold type
   *
   * @return the type of xval
   */
  public SelectedTag getFoldsType () {
    return new SelectedTag(m_xvalType, XVALTAGS_SELECTION);
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String debugTipText() {
    return "Turn on verbose output for monitoring the search's progress.";
  }

  /**
   * Set whether verbose output should be generated.
   * @param d true if output is to be verbose.
   */
  public void setDebug(boolean d) {
    m_debug = d;
  }

  /**
   * Get whether output is to be verbose
   * @return true if output will be verbose
   */
  public boolean getDebug() {
    return m_debug;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String attributeEvaluatorTipText() {
    return "Attribute evaluator to use for generating an initial ranking. "
      +"Use in conjunction with a rank race";    
  }

  /**
   * Set the attribute evaluator to use for generating the ranking.
   * @param newEvaluator the attribute evaluator to use.
   */
  public void setAttributeEvaluator(ASEvaluation newEvaluator) {
    m_ASEval = newEvaluator;
  }

  /**
   * Get the attribute evaluator used to generate the ranking.
   * @return the evaluator used to generate the ranking.
   */
  public ASEvaluation getAttributeEvaluator() {
    return m_ASEval;
  }

    /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String generateRankingTipText() {
    return "Use the racing process to generate a ranked list of attributes. "
      +"Using this mode forces the race to be a forward type and then races "
      +"until all attributes have been added, thus giving a ranked list";
  }
  
  /**
   * Records whether the user has requested a ranked list of attributes.
   * @param doRank true if ranking is requested
   */
  public void setGenerateRanking(boolean doRank) {
    m_rankingRequested = doRank;
    if (m_rankingRequested) {
      try {
	setRaceType(new SelectedTag(FORWARD_RACE,
				    TAGS_SELECTION));
      } catch (Exception ex) {
      }
    }
  }

  /**
   * Gets whether ranking has been requested. This is used by the
   * AttributeSelection module to determine if rankedAttributes()
   * should be called.
   * @return true if ranking has been requested.
   */
  public boolean getGenerateRanking() {
    return m_rankingRequested;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numToSelectTipText() {
    return "Specify the number of attributes to retain. Use in conjunction "
      +"with generateRanking. The default value "
      +"(-1) indicates that all attributes are to be retained. Use either "
      +"this option or a threshold to reduce the attribute set.";
  }

  /**
   * Specify the number of attributes to select from the ranked list
   * (if generating a ranking). -1
   * indicates that all attributes are to be retained.
   * @param n the number of attributes to retain
   */
  public void setNumToSelect(int n) {
    m_numToSelect = n;
  }

  /**
   * Gets the number of attributes to be retained.
   * @return the number of attributes to retain
   */
  public int getNumToSelect() {
    return m_numToSelect;
  }

  /**
   * Gets the calculated number of attributes to retain. This is the
   * actual number of attributes to retain. This is the same as
   * getNumToSelect if the user specifies a number which is not less
   * than zero. Otherwise it should be the number of attributes in the
   * (potentially transformed) data.
   */
  public int getCalculatedNumToSelect() {
    if (m_numToSelect >= 0) {
      m_calculatedNumToSelect = m_numToSelect;
    }
    return m_calculatedNumToSelect;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String selectionThresholdTipText() {
    return "Set threshold by which attributes can be discarded. Default value "
      + "results in no attributes being discarded. Use in conjunction with "
      + "generateRanking";
  }

  /**
   * Set the threshold by which the AttributeSelection module can discard
   * attributes.
   * @param threshold the threshold.
   */
  public void setSelectionThreshold(double threshold) {
    m_threshold = threshold;
  }

  /**
   * Returns the threshold so that the AttributeSelection module can
   * discard attributes from the ranking.
   */
  public double getSelectionThreshold() {
    return m_threshold;
  }


  /**
   * Returns an enumeration describing the available options
   * @return an enumeration of all the available options
   **/
  public Enumeration listOptions () {
    Vector newVector = new Vector(8);
     newVector.addElement(new Option("\tType of race to perform.\n\t"
				     +"(default = 0).",
				     "R", 1 ,"-R <0 = forward | 1 = backward "
				     +"race | 2 = schemata | 3 = rank>"));
     newVector.addElement(new Option("\tSignificance level for comaparisons"
				     +"\n\t(default = 0.001(forward/backward/"
				     +"rank)/0.01(schemata)).",
				     "L",1,"-L <significance>"));
     newVector.addElement(new Option("\tThreshold for error comparison.\n\t"
				     +"(default = 0.001).",
				     "T",1,"-T <threshold>"));
     
     newVector.addElement(new Option("\tAttribute ranker to use if doing a "
			   +"\n\trank search. Place any\n\t"
			   +"evaluator options LAST on the" 
			   + "\n\tcommand line following a \"--\"." 
			   + "\n\teg. -A weka.attributeSelection."
			   +"GainRatioAttributeEval ... " 
			   + "-- -M.\n\t(default = GainRatioAttributeEval)", 
			   "A", 1, "-A <attribute evaluator>"));
    
     newVector.addElement(new Option("\tFolds for cross validation\n\t"
			    +"(default = 0 (1 if schemata race)",
			    "F",1,"-F <0 = 10 fold | 1 = leave-one-out>"));
     newVector.addElement(new Option("\tGenerate a ranked list of attributes."
				     +"\n\tForces the search to be forward\n."
				     +"\tand races until all attributes have\n"
				     +"\tselected, thus producing a ranking.",
				     "Q",0,"-Q"));

    newVector
      .addElement(new Option("\tSpecify number of attributes to retain from "
			     +"\n\tthe ranking. Overides -T. Use "
			     +"in conjunction with -Q"
			     ,"N",1
			     , "-N <num to select>"));

    newVector
      .addElement(new Option("\tSpecify a theshold by which attributes" 
			     + "\n\tmay be discarded from the ranking."
			     +"\n\tUse in conjuction with -Q","T",1
			     , "-T <threshold>"));

     newVector.addElement(new Option("\tVerbose output for monitoring the "
				     +"search.",
				     "Z",0,"-Z"));
     if ((m_ASEval != null) && 
	 (m_ASEval instanceof OptionHandler)) {
       newVector.addElement(new Option("", "", 0, "\nOptions specific to " 
				       + "evaluator " 
				       + m_ASEval.getClass().getName() 
				       + ":"));
       Enumeration enum = ((OptionHandler)m_ASEval).listOptions();
       
       while (enum.hasMoreElements()) {
	 newVector.addElement(enum.nextElement());
       }
     }
     return newVector.elements();
  }

  /**
   * Parses a given list of options.
   *
   * Valid options are:<p>
   *
   * -R <race type><br>
   * 0 = forward, 1 = backward, 2 = schemata, 3 = rank. <p>
   * 
   * -L <significance level> <br>
   * significance level to use for t-tests. <p>
   *
   * -T <threshold> <br>
   * threshold for considering mean errors of two subsets the same <p>
   *
   * -F <xval type> <br>
   * 0 = 10 fold, 1 = leave-one-out (selected automatically for schemata race
   * <p>
   *
   * -A <attribute evaluator> <br>
   * the attribute evaluator to use when doing a rank search <p>
   *
   * -Q <br>
   * produce a ranked list of attributes. Selecting this option forces
   * the race type to be forward. Racing continues until *all* attributes
   * have been selected, thus producing a ranked list of attributes. <p>
   *
   * -N <number to retain> <br>
   * Specify the number of attributes to retain. Overides any threshold. 
   * Use in conjunction with -Q. <p>
   * 
   * -J <threshold> <br>
   * Specify a threshold by which the AttributeSelection module can discard
   * attributes. Use in conjunction with -Q. <p>
   *
   * -Z <br>
   * Turn on verbose output for monitoring the search <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   *
   **/
  public void setOptions (String[] options)
    throws Exception
  {
    String optionString;
    resetOptions();
    
    optionString = Utils.getOption('R', options);
    if (optionString.length() != 0) {
      setRaceType(new SelectedTag(Integer.parseInt(optionString),
				  TAGS_SELECTION));
    }
    
    optionString = Utils.getOption('F', options);
    if (optionString.length() != 0) {
      setFoldsType(new SelectedTag(Integer.parseInt(optionString),
				  XVALTAGS_SELECTION));
    }

    optionString = Utils.getOption('L', options);
    if (optionString.length() !=0) {
      Double temp;
      temp = Double.valueOf(optionString);
      setSignificanceLevel(temp.doubleValue());
    }

    optionString = Utils.getOption('T', options);
    if (optionString.length() !=0) {
      Double temp;
      temp = Double.valueOf(optionString);
      setThreshold(temp.doubleValue());
    }

    optionString = Utils.getOption('A', options);
    if (optionString.length() != 0) {
      setAttributeEvaluator(ASEvaluation.forName(optionString, 
			    Utils.partitionOptions(options)));
    }

    setGenerateRanking(Utils.getFlag('Q', options));

    optionString = Utils.getOption('T', options);
    if (optionString.length() != 0) {
      Double temp;
      temp = Double.valueOf(optionString);
      setThreshold(temp.doubleValue());
    }
    
    optionString = Utils.getOption('N', options);
    if (optionString.length() != 0) {
      setNumToSelect(Integer.parseInt(optionString));
    }

    setDebug(Utils.getFlag('Z', options));
  }

  /**
   * Gets the current settings of BestFirst.
   * @return an array of strings suitable for passing to setOptions()
   */
  public String[] getOptions () {
    int current = 0;
    String[] evaluatorOptions = new String[0];

    if ((m_ASEval != null) && 
	(m_ASEval instanceof OptionHandler)) {
      evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
    }
    String[] options = new String[17+evaluatorOptions.length];

    options[current++] = "-R"; options[current++] = ""+m_raceType;
    options[current++] = "-L"; options[current++] = ""+getSignificanceLevel();
    options[current++] = "-T"; options[current++] = ""+getThreshold();
    options[current++] = "-F"; options[current++] = ""+m_xvalType;
    if (getGenerateRanking()) {
      options[current++] = "-Q";
    }
    options[current++] = "-N"; options[current++] = ""+getNumToSelect();
    options[current++] = "-J"; options[current++] = ""+getSelectionThreshold();
    if (getDebug()) {
      options[current++] = "-Z";
    }
    
    if (getAttributeEvaluator() != null) {
      options[current++] = "-A";
      options[current++] = getAttributeEvaluator().getClass().getName();
      options[current++] = "--";
      System.arraycopy(evaluatorOptions, 0, options, current, 
		       evaluatorOptions.length);
      current += evaluatorOptions.length;
    }

    
    while (current < options.length) {
      options[current++] = "";
    }

    return  options;
  }




  /**
   * Searches the attribute subset space by racing cross validation
   * errors of competing subsets
   *
   * @param ASEvaluator the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @exception Exception if the search can't be completed
   */
  public int[] search (ASEvaluation ASEval, Instances data)
    throws Exception
  {
    if (!(ASEval instanceof SubsetEvaluator)) {
      throw  new Exception(ASEval.getClass().getName() 
			   + " is not a " 
			   + "Subset evaluator! (RaceSearch)");
    }

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      throw new Exception("Can't use an unsupervised subset evaluator "
			  +"(RaceSearch).");
    }

    if (!(ASEval instanceof HoldOutSubsetEvaluator)) {
      throw new Exception("Must use a HoldOutSubsetEvaluator, eg. "
			  +"weka.attributeSelection.ClassifierSubsetEval "
			  +"(RaceSearch)");
    }

    if (!(ASEval instanceof ErrorBasedMeritEvaluator)) {
      throw new Exception("Only error based subset evaluators can be used, "
			  +"eg. weka.attributeSelection.ClassifierSubsetEval "
			  +"(RaceSearch)");
    }

    m_Instances = data;
    m_Instances.deleteWithMissingClass();
    if (m_Instances.numInstances() == 0) {
      throw new Exception("All instances have missing class! (RaceSearch)");
    }
    if (m_rankingRequested && m_numToSelect > m_Instances.numAttributes()-1) {
      throw new Exception("More attributes requested than exist in the data "
			  +"(RaceSearch).");
    }
    m_theEvaluator = (HoldOutSubsetEvaluator)ASEval;
    m_numAttribs = m_Instances.numAttributes();
    m_classIndex = m_Instances.classIndex();

    if (m_rankingRequested) {
      m_rankedAtts = new double[m_numAttribs-1][2];
      m_rankedSoFar = 0;
    }

    if (m_xvalType == LEAVE_ONE_OUT) {
      m_numFolds = data.numInstances();
    } else {
      m_numFolds = 10;
    }

    data.randomize(new Random(1));
    int [] bestSubset=null;

    switch (m_raceType) {
    case FORWARD_RACE:
    case BACKWARD_RACE: 
      bestSubset = hillclimbRace(data);
      break;
    case SCHEMATA_RACE:
      bestSubset = schemataRace(data);
      break;
    case RANK_RACE:
      bestSubset = rankRace(data);
      break;
    }

    return bestSubset;
  }

  public double [][] rankedAttributes() throws Exception {
    if (!m_rankingRequested) {
      throw new Exception("Need to request a ranked list of attributes "
			  +"before attributes can be ranked (RaceSearch).");
    }
    if (m_rankedAtts == null) {
      throw new Exception("Search must be performed before attributes "
			  +"can be ranked (RaceSearch).");
    }
    
    double [][] final_rank = new double [m_rankedSoFar][2];
    for (int i=0;i<m_rankedSoFar;i++) {
      final_rank[i][0] = m_rankedAtts[i][0];
      final_rank[i][1] = m_rankedAtts[i][1];
    }

    if (m_numToSelect <= 0) {
      if (m_threshold == -Double.MAX_VALUE) {
	m_calculatedNumToSelect = final_rank.length;
      } else {
	determineNumToSelectFromThreshold(final_rank);
      }
    }

    return final_rank;
  }

  private void determineNumToSelectFromThreshold(double [][] ranking) {
    int count = 0;
    for (int i = 0; i < ranking.length; i++) {
      if (ranking[i][1] > m_threshold) {
	count++;
      }
    }
    m_calculatedNumToSelect = count;
  }

  /**
   * Print an attribute set.
   */
  private String printSets(char [][]raceSets) {
    StringBuffer temp = new StringBuffer();
    for (int i=0;i<raceSets.length;i++) {
      for (int j=0;j<m_numAttribs;j++) {
	temp.append(raceSets[i][j]);
      }
      temp.append('\n');
    }
    return temp.toString();
  }

  /**
   * Performs a schemata race---a series of races in parallel.
   * @param data the instances to estimate accuracy over.
   * @return an array of selected attribute indices.
   */
  private int [] schemataRace(Instances data) throws Exception {
    // # races, 2 (competitors in each race), # attributes
    char [][][] parallelRaces;
    int numRaces = m_numAttribs-1;
    Random r = new Random(42);
    int numInstances = data.numInstances();
    Instances trainCV; Instances testCV;
    Instance testInstance;

    // statistics on the racers
    Stats [][] raceStats = new Stats[numRaces][2];
    
    parallelRaces = new char [numRaces][2][m_numAttribs-1];
    char [] base = new char [m_numAttribs];
    for (int i=0;i<m_numAttribs;i++) {
      base[i] = '*';
    }

    int count=0;
    // set up initial races
    for (int i=0;i<m_numAttribs;i++) {
      if (i != m_classIndex) {
	parallelRaces[count][0] = (char [])base.clone();
	parallelRaces[count][1] = (char [])base.clone();
	parallelRaces[count][0][i] = '1';
	parallelRaces[count++][1][i] = '0';
      }
    }
    
    if (m_debug) {
      System.err.println("Initial sets:\n");
      for (int i=0;i<numRaces;i++) {
	System.err.print(printSets(parallelRaces[i])+"--------------\n");
      }
    }
    
    BitSet randomB = new BitSet(m_numAttribs);
    char [] randomBC = new char [m_numAttribs];

    // notes which bit positions have been decided
    boolean [] attributeConstraints = new boolean[m_numAttribs];
    double error;
    int evaluationCount = 0;
    raceSet: while (numRaces > 0) {
      boolean won = false;
      for (int i=0;i<numRaces;i++) {
	raceStats[i][0] = new Stats();
	raceStats[i][1] = new Stats();
      }

      // keep an eye on how many test instances have been randomly sampled
      int sampleCount = 0;
      // run the current set of races
      while (!won) {
	// generate a random binary string
	for (int i=0;i<m_numAttribs;i++) {
	  if (i != m_classIndex) {
	    if (!attributeConstraints[i]) {
	      if (r.nextDouble() < 0.5) {
		randomB.set(i);
	      } else {
		randomB.clear(i);
	      }
	    } else { // this position has been decided from previous races
	      if (base[i] == '1') { 
		randomB.set(i);
	      } else {
		randomB.clear(i);
	      }
	    }
	  }
	}
	
	// randomly select an instance to test on
	int testIndex = Math.abs(r.nextInt() % numInstances);
	trainCV = data.trainCV(numInstances, testIndex);
	testCV = data.testCV(numInstances, testIndex);
	testInstance = testCV.instance(0);
	sampleCount++;
	/*	if (sampleCount > numInstances) {
	  throw new Exception("raceSchemata: No clear winner after sampling "
			      +sampleCount+" instances.");
			      } */
	
	m_theEvaluator.buildEvaluator(trainCV);
	
	// the evaluator must retrain for every test point
	error = -((HoldOutSubsetEvaluator)m_theEvaluator).
	  evaluateSubset(randomB, 
			 testInstance,
			 true);
	evaluationCount++;
	
	// see which racers match this random subset
	for (int i=0;i<m_numAttribs;i++) {
	  if (randomB.get(i)) {
	    randomBC[i] = '1';
	  } else {
	    randomBC[i] = '0';
	  }
	}
	//	System.err.println("Random subset: "+(new String(randomBC)));

        checkRaces: for (int i=0;i<numRaces;i++) {
	  // if a pair of racers has evaluated more than num instances
	  // then bail out---unlikely that having any more atts is any
	  // better than the current base set.
	  if (((raceStats[i][0].count + raceStats[i][1].count) / 2) > 
	      (numInstances)) {
	    break raceSet;
	  }
	  for (int j=0;j<2;j++) {
	    boolean matched = true;
	    for (int k =0;k<m_numAttribs;k++) {
	      if (parallelRaces[i][j][k] != '*') {
		if (parallelRaces[i][j][k] != randomBC[k]) {
		  matched = false;
		  break;
		}
	      }
	    }
	    if (matched) { // update the stats for this racer
	      //	      System.err.println("Matched "+i+" "+j);
	      raceStats[i][j].add(error);

		// does this race have a clear winner, meaning we can
		// terminate the whole set of parallel races?
		if (raceStats[i][0].count > m_samples &&
		    raceStats[i][1].count > m_samples) {
		  raceStats[i][0].calculateDerived();
		  raceStats[i][1].calculateDerived();
		  //		  System.err.println(j+" : "+(new String(parallelRaces[i][j])));
		  //		  System.err.println(raceStats[i][0]);
		  //		  System.err.println(raceStats[i][1]);
		  // check the ttest
		  double prob;
		  // first check for difference in variance
		  /*if ((prob = ftest(raceStats[i][0], raceStats[i][1])) < m_sigLevel) {
		    System.err.println("Ftest prob (diff var) : "+prob);
		    prob = tutest(raceStats[i][0], raceStats[i][1]);
		    } else { */ // use ttest for same variance
		  //		    System.err.println("Ftest prob : "+prob);
		    prob = ttest(raceStats[i][0], raceStats[i][1]);
		    // }
		    //		  System.err.println("Prob :"+prob);
		  if (prob < m_sigLevel) { // stop the races we have a winner!
		    if (raceStats[i][0].mean < raceStats[i][1].mean) {
		      base = (char [])parallelRaces[i][0].clone();
		      m_bestMerit = raceStats[i][0].mean;
		      if (m_debug) {
			System.err.println("contender 0 won ");
		      }
		    } else {
		      base = (char [])parallelRaces[i][1].clone();
		      m_bestMerit = raceStats[i][1].mean;
		      if (m_debug) {
			System.err.println("contender 1 won");
		      }
		    }
		    if (m_debug) {
		      System.err.println((new String(parallelRaces[i][0]))
				 +" "+(new String(parallelRaces[i][1])));
		      System.err.println("Means : "+raceStats[i][0].mean
					 +" vs"+raceStats[i][1].mean);
		      System.err.println("Evaluations so far : "
					 +evaluationCount);
		    }
		    won = true;
		    break checkRaces;
		  }
		}
	     
	    }
	  }
	}
      }

      numRaces--;
      // set up the next set of races if necessary
      if (numRaces > 0 && won) {
	parallelRaces = new char [numRaces][2][m_numAttribs-1];
	raceStats = new Stats[numRaces][2];
	// update the attribute constraints
	for (int i=0;i<m_numAttribs;i++) {
	  if (i != m_classIndex && !attributeConstraints[i] &&
	      base[i] != '*') {
	    attributeConstraints[i] = true;
	    break;
	  }
	}
	count=0;
	for (int i=0;i<numRaces;i++) {
	  parallelRaces[i][0] = (char [])base.clone();
	  parallelRaces[i][1] = (char [])base.clone();
	  for (int j=count;j<m_numAttribs;j++) {
	    if (j != m_classIndex && parallelRaces[i][0][j] == '*') {
	      parallelRaces[i][0][j] = '1';
	      parallelRaces[i][1][j] = '0';
	      count = j+1;
	      break;
	    }
	  }
	}
	
	if (m_debug) {
	  System.err.println("Next sets:\n");
	  for (int i=0;i<numRaces;i++) {
	    System.err.print(printSets(parallelRaces[i])+"--------------\n");
	  }
	}
      }
    }

    if (m_debug) {
      System.err.println("Total evaluations : "
			 +evaluationCount);
    }
    return attributeList(base);
  }

  // Adapted from numerical recipes
  private double gammln(double xx) {
    double x,y,tmp,ser;
    final double [] cof = {76.18009172947146,-86.50532032941677,
			  24.01409824083091,-1.231739572450155,
			  0.1208650973866179e-2,-0.5395239384953e-5};
    int j;
    
    y=x=xx;
    tmp=x+5.5;
    tmp -= (x+0.5)*Math.log(tmp);
    ser=1.000000000190015;
    for (j=0;j<=5;j++) ser += cof[j]/++y;
    return -tmp+Math.log(2.5066282746310005*ser/x);
  }

  // Adapted from numerical recipes
  private double betacf(double a, double b, double x) throws Exception {
    final int maxit = 100;
    final double eps = 3.0e-7;
    final double fpmin = 1.0e-30;
    int m,m2;
    double aa,c,d,del,h,qab,qam,qap;
    //    System.err.println("a "+a+" b "+b+" x "+x);
    qab=a+b;
    qap=a+1.0;
    qam=a-1.0;
    c=1.0;
    d=1.0-qab*x/qap;
    if (Math.abs(d) < fpmin) d=fpmin;
    d=1.0/d;
    h=d;
    for (m=1;m<=maxit;m++) {
      m2=2*m;
      aa=m*(b-m)*x/((qam+m2)*(a+m2));
      d=1.0+aa*d;
      if (Math.abs(d) < fpmin) d=fpmin;
      c=1.0+aa/c;
      if (Math.abs(c) < fpmin) c=fpmin;
      d=1.0/d;
      h *= d*c;
      aa = -(a+m)*(qab+m)*x/((a+m2)*(qap+m2));
      d=1.0+aa*d;
      if (Math.abs(d) < fpmin) d=fpmin;
      c=1.0+aa/c;
      if (Math.abs(c) < fpmin) c=fpmin;
      d=1.0/d;
      del=d*c;
      h *= del;
      if (Math.abs(del-1.0) < eps) break;
    }
    if (m > maxit) {
      throw new Exception("a or b too big, or maxit too small in betacf");
    }
    return h;
  }

  // Adapted from numerical recipes
  private double betai(double a, double b, double x) throws Exception {
    double bt;
    //    System.err.println("***a "+a+" b "+b+" x "+x);
    if (x < 0.0 || x > 1.0) {
      throw new Exception("Bad x in routine betai");
    }
    if (x == 0.0 || x == 1.0) bt=0.0;
    else
      bt=Math.exp(gammln(a+b)-gammln(a)-gammln(b)+a*Math.log(x)+b*Math.log(1.0-x));
    if (x < (a+1.0)/(a+b+2.0))
      return bt*betacf(a,b,x)/a;
    else
      return 1.0-bt*betacf(b,a,1.0-x)/b;
  }

  // ftest for differences in variance. Returns probability that difference
  // in variance is due to chance. Adapted from numerical recipes
  private double ftest(Stats c1, Stats c2) throws Exception {
    double n1 = c1.count; double n2 = c2.count;
    double var1 = c1.stdDev*c1.stdDev;
    double var2 = c2.stdDev*c2.stdDev;
    double ave1 = c1.mean;
    double ave2 = c2.mean;
    double f,df1,df2,prob;
    
    if (var1 > var2) {
      f=var1/var2;
      df1=n1-1;
      df2=n2-1;
    } else {
      f=var2/var1;
      df1=n2-1;
      df2=n1-1;
    }
    prob = 2.0*betai(0.5*df2,0.5*df1,df2/(df2+df1*(f)));
    if (prob > 1.0) prob=2.0-prob;

    return prob;
  }

  // t-test for unequal sample sizes and same variance. Returns probability
  // that observed difference in means is due to chance. Adapted from
  // numerical recipes
  private double ttest(Stats c1, Stats c2) throws Exception {
    double n1 = c1.count; double n2 = c2.count;
    double var1 = c1.stdDev*c1.stdDev;
    double var2 = c2.stdDev*c2.stdDev;
    double ave1 = c1.mean;
    double ave2 = c2.mean;
    
    double df=n1+n2-2;
    double svar=((n1-1)*var1+(n2-1)*var2)/df;
    double t=(ave1-ave2)/Math.sqrt(svar*(1.0/n1+1.0/n2));
    t = Math.abs(t);
    //    System.err.println("t : "+t);
    double prob=betai(0.5*df,0.5,df/(df+(t)*(t)));
    
    return prob;
  }

  // t-test for unequal sample sizes and different variances. Returns the 
  // probability. Adapted from numerical recipes
  private double tutest(Stats c1, Stats c2) throws Exception {
    double n1 = c1.count; double n2 = c2.count;
    double var1 = c1.stdDev*c1.stdDev;
    double var2 = c2.stdDev*c2.stdDev;
    double ave1 = c1.mean;
    double ave2 = c2.mean;

    double t=(ave1-ave2)/Math.sqrt(var1/n1+var2/n2);
    t = Math.abs(t);
    double df=Math.sqrt(var1/n1+var2/n2)/(Math.sqrt(var1/n1)/(n1-1)+Math.sqrt(var2/n2)/(n2-1));
    //    System.err.println("t : "+t);
    //    System.err.println("df : "+df);
    double prob = betai(0.5*df,0.5,df/(df+Math.sqrt(t)));
    return prob;
  }
    
  /**
   * Performs a rank race---race consisting of no attributes, the top
   * ranked attribute, the top two attributes etc. The initial ranking
   * is determined by an attribute evaluator.
   * @param data the instances to estimate accuracy over
   * @return an array of selected attribute indices.
   */
  private int [] rankRace(Instances data) throws Exception {
    char [] baseSet = new char [m_numAttribs];
    char [] bestSet;
    double bestSetError;
    for (int i=0;i<m_numAttribs;i++) {
      if (i == m_classIndex) {
	baseSet[i] = '-';
      } else {
	baseSet[i] = '0';
      }
    }

    int numCompetitors = m_numAttribs-1;
    char [][] raceSets = new char [numCompetitors+1][m_numAttribs];
    int winner;
    
    if (m_ASEval instanceof AttributeEvaluator) {
      // generate the attribute ranking first
      Ranker ranker = new Ranker();
      ((AttributeEvaluator)m_ASEval).buildEvaluator(data);
      m_Ranking = ranker.search((AttributeEvaluator)m_ASEval,data);
    } else {
      ForwardSelection fs = new ForwardSelection();
      double [][]rankres; 
      fs.setGenerateRanking(true);
      ((SubsetEvaluator)m_ASEval).buildEvaluator(data);
      fs.search(m_ASEval, data);
      rankres = fs.rankedAttributes();
      m_Ranking = new int[rankres.length];
      for (int i=0;i<rankres.length;i++) {
	m_Ranking[i] = (int)rankres[i][0];
      }
    }

    // set up the race
    raceSets[0] = (char [])baseSet.clone();
    for (int i=0;i<m_Ranking.length;i++) {
      raceSets[i+1] = (char [])raceSets[i].clone();
      raceSets[i+1][m_Ranking[i]] = '1';
    }
    
    if (m_debug) {
      System.err.println("Initial sets:\n"+printSets(raceSets));
    }
    
    // run the race
    double [] winnerInfo = raceSubsets(raceSets, data, true);
    bestSetError = winnerInfo[1];
    bestSet = (char [])raceSets[(int)winnerInfo[0]].clone();
    m_bestMerit = bestSetError;
    return attributeList(bestSet);
  }
  
  /**
   * Performs a hill climbing race---all single attribute changes to a
   * base subset are raced in parallel. The winner is chosen and becomes
   * the new base subset and the process is repeated until there is no
   * improvement in error over the base subset.
   * @param data the instances to estimate accuracy over
   * @return an array of selected attribute indices.
   */
  private int [] hillclimbRace(Instances data) throws Exception {
    double baseSetError;
    char [] baseSet = new char [m_numAttribs];
    int rankCount = 0;

    for (int i=0;i<m_numAttribs;i++) {
      if (i != m_classIndex) {
	if (m_raceType == FORWARD_RACE) {
	  baseSet[i] = '0';
	} else {
	  baseSet[i] = '1';
	} 
      } else {
	baseSet[i] = '-';
      }
    }

    int numCompetitors = m_numAttribs-1;
    char [][] raceSets = new char [numCompetitors+1][m_numAttribs];
    int winner;

    raceSets[0] = (char [])baseSet.clone();
    int count = 1;
    // initialize each race set to 1 attribute
    for (int i=0;i<m_numAttribs;i++) {
      if (i != m_classIndex) {
	raceSets[count] = (char [])baseSet.clone();
	if (m_raceType == BACKWARD_RACE) {
	  raceSets[count++][i] = '0';
	} else {
	  raceSets[count++][i] = '1';
	}
      }
    }

    if (m_debug) {
      System.err.println("Initial sets:\n"+printSets(raceSets));
    }
    
    // race the initial sets (base set either no or all features)
    double [] winnerInfo = raceSubsets(raceSets, data, true);
    baseSetError = winnerInfo[1];
    m_bestMerit = baseSetError;
    baseSet = (char [])raceSets[(int)winnerInfo[0]].clone();
    if (m_rankingRequested) {
      m_rankedAtts[m_rankedSoFar][0] = (int)(winnerInfo[0]-1);
      m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1];
      m_rankedSoFar++;
    }

    boolean improved = true;
    int j;
    // now race until there is no improvement over the base set or only
    // one competitor remains
    while (improved) {
      // generate the next set of competitors
      numCompetitors--;
      if (numCompetitors == 0) { //race finished!
	break;
      }
      j=0;
      // +1. we'll race against the base set---might be able to bail out
      // of the race if none from the new set are statistically better
      // than the base set. Base set is stored in loc 0.
      raceSets = new char [numCompetitors+1][m_numAttribs];
      for (int i=0;i<numCompetitors+1;i++) {
	raceSets[i] = (char [])baseSet.clone();
	if (i > 0) {
	  for (int k=j;k<m_numAttribs;k++) {
	    if (m_raceType == 1) {
	      if (k != m_classIndex && raceSets[i][k] != '0') {
		raceSets[i][k] = '0';
		j = k+1;
		break;
	      }
	    } else {
	      if (k != m_classIndex && raceSets[i][k] != '1') {
		raceSets[i][k] = '1';
		j = k+1;
		break;
	      }
	    }
	  }
	}
      }
      
      if (m_debug) {
	System.err.println("Next set : \n"+printSets(raceSets));
      }
      improved = false;
      winnerInfo = raceSubsets(raceSets, data, true);
      String bs = new String(baseSet); 
      String win = new String(raceSets[(int)winnerInfo[0]]);
      if (bs.compareTo(win) == 0) {
	// race finished
      } else {
	if (winnerInfo[1] < baseSetError || m_rankingRequested) {
	  improved = true;
	  baseSetError = winnerInfo[1];
	  m_bestMerit = baseSetError;
	  // find which att is different
	  if (m_rankingRequested) {
	    for (int i = 0; i < baseSet.length; i++) {
	      if (win.charAt(i) != bs.charAt(i)) {
		m_rankedAtts[m_rankedSoFar][0] = i;
		m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1];
		m_rankedSoFar++;
	      }
	    }
	  }
	  baseSet = (char [])raceSets[(int)winnerInfo[0]].clone();
	} else {
	  // Will get here for a subset whose error is outside the delta
	  // threshold but is not *significantly* worse than the base
	  // subset
	  //throw new Exception("RaceSearch: problem in hillClimbRace");
	}
      }
    }
    return attributeList(baseSet);
  }

  /**
   * Convert an attribute set to an array of indices
   */
  private int [] attributeList(char [] list) {
    int count = 0;

    for (int i=0;i<m_numAttribs;i++) {
      if (list[i] == '1') {
	count++;
      }
    }

    int [] rlist = new int[count];
    count = 0;
     for (int i=0;i<m_numAttribs;i++) {
       if (list[i] == '1') {
	 rlist[count++] = i;
       }
     }

     return rlist;
  }

  /**
   * Races the leave-one-out cross validation errors of a set of
   * attribute subsets on a set of instances.
   * @param raceSets a set of attribute subset specifications
   * @param data the instances to use when cross validating
   * @param baseSetIncluded true if the first attribute set is a
   * base set generated from the previous race
   * @return the index of the winning subset
   * @exception Exception if an error occurs during cross validation
   */
  private double [] raceSubsets(char [][]raceSets, Instances data,
				boolean baseSetIncluded) 
    throws Exception {
    // the evaluators --- one for each subset
    ASEvaluation [] evaluators = 
      m_theEvaluator.makeCopies(m_theEvaluator, raceSets.length);

    // array of subsets eliminated from the race
    boolean [] eliminated = new boolean [raceSets.length];

    // individual statistics
    Stats [] individualStats = new Stats [raceSets.length];

    // pairwise statistics
    PairedStats [][] testers = 
      new PairedStats[raceSets.length][raceSets.length];

    /** do we ignore the base set or not? */
    int startPt = m_rankingRequested ? 1 : 0;

    for (int i=0;i<raceSets.length;i++) {
      individualStats[i] = new Stats();
      for (int j=i+1;j<raceSets.length;j++) {
	testers[i][j] = new PairedStats(m_sigLevel);
      }
    }
    
    BitSet [] raceBitSets = new BitSet[raceSets.length];
    for (int i=0;i<raceSets.length;i++) {
      raceBitSets[i] = new BitSet(m_numAttribs);
      for (int j=0;j<m_numAttribs;j++) {
	if (raceSets[i][j] == '1') {
	  raceBitSets[i].set(j);
	}
      }
    }

    // now loop over the data points collecting leave-one-out errors for
    // each attribute set
    Instances trainCV;
    Instances testCV;
    Instance testInst;
    double [] errors = new double [raceSets.length];
    int eliminatedCount = 0;
    int processedCount = 0;
    // if there is one set left in the race then we need to continue to
    // evaluate it for the remaining instances in order to get an
    // accurate error estimate
    Stats clearWinner = null;
    int foldSize=1;
    processedCount = 0;
    race: for (int i=0;i<m_numFolds;i++) {
      trainCV = data.trainCV(m_numFolds, i);
      testCV = data.testCV(m_numFolds, i);
      foldSize = testCV.numInstances();
      
      // loop over the surviving attribute sets building classifiers for this
      // training set
      for (int j=startPt;j<raceSets.length;j++) {
	if (!eliminated[j]) {
	  evaluators[j].buildEvaluator(trainCV);
	}
      }

      for (int z=0;z<testCV.numInstances();z++) {
	testInst = testCV.instance(z);
	processedCount++;

	// loop over surviving attribute sets computing errors for this
	// test point
	for (int zz=startPt;zz<raceSets.length;zz++) {
	  if (!eliminated[zz]) {
	    if (z == 0) {// first test instance---make sure classifier is built
	      errors[zz] = -((HoldOutSubsetEvaluator)evaluators[zz]).
		evaluateSubset(raceBitSets[zz], 
			       testInst,
			       true);
	    } else { // must be k fold rather than leave one out
	      errors[zz] = -((HoldOutSubsetEvaluator)evaluators[zz]).
		evaluateSubset(raceBitSets[zz], 
			       testInst,
			       false);
	    }
	  }
	}

	// now update the stats
	for (int j=startPt;j<raceSets.length;j++) {
	  if (!eliminated[j]) {
	    individualStats[j].add(errors[j]);
	    for (int k=j+1;k<raceSets.length;k++) {
	      if (!eliminated[k]) {
		testers[j][k].add(errors[j], errors[k]);
	      }
	    }
	  }
	}
      
	// test for near identical models and models that are significantly
	// worse than some other model
	if (processedCount > m_samples-1 && 
	    (eliminatedCount < raceSets.length-1)) {
	  for (int j=0;j<raceSets.length;j++) {
	    if (!eliminated[j]) {
	      for (int k=j+1;k<raceSets.length;k++) {
		if (!eliminated[k]) {
		  testers[j][k].calculateDerived();
		  // near identical ?
		  if ((testers[j][k].differencesSignificance == 0) && 
		      (Utils.eq(testers[j][k].differencesStats.mean, 0.0) ||
		      (Utils.gr(m_delta, Math.abs(testers[j][k].
						  differencesStats.mean))))) {
		    // if they're exactly the same and there is a base set
		    // in this race, make sure that the base set is NOT the
		    // one eliminated.
		    if (Utils.eq(testers[j][k].differencesStats.mean, 0.0)) {

		      if (baseSetIncluded) { 
			if (j != 0) {
			  eliminated[j] = true;
			} else {
			  eliminated[k] = true;
			}
			eliminatedCount++;
		      } else {
			eliminated[j] = true;
		      }
		      if (m_debug) {
			System.err.println("Eliminating (identical) "
					   +j+" "+raceBitSets[j].toString()
					   +" vs "+k+" "
					   +raceBitSets[k].toString()
					   +" after "
					   +processedCount
					   +" evaluations\n"
					   +"\nerror "+j+" : "
					   +testers[j][k].xStats.mean
					   +" vs "+k+" : "
					   +testers[j][k].yStats.mean
					   +" diff : "
					   +testers[j][k].differencesStats
					   .mean);
		      }
		    } else {
		      // eliminate the one with the higer error
		      if (testers[j][k].xStats.mean > 
			  testers[j][k].yStats.mean) {
			eliminated[j] = true;
			eliminatedCount++;
			if (m_debug) {
			  System.err.println("Eliminating (near identical) "
					   +j+" "+raceBitSets[j].toString()
					   +" vs "+k+" "
					   +raceBitSets[k].toString()
					   +" after "
					   +processedCount
					   +" evaluations\n"
					   +"\nerror "+j+" : "
					   +testers[j][k].xStats.mean
					   +" vs "+k+" : "
					   +testers[j][k].yStats.mean
					   +" diff : "
					   +testers[j][k].differencesStats
					   .mean);
			}
			break;
		      } else {
			eliminated[k] = true;
			eliminatedCount++;
			if (m_debug) {
			  System.err.println("Eliminating (near identical) "
					   +k+" "+raceBitSets[k].toString()
					   +" vs "+j+" "
					   +raceBitSets[j].toString()
					   +" after "
					   +processedCount
					   +" evaluations\n"
					   +"\nerror "+k+" : "
					   +testers[j][k].yStats.mean
					   +" vs "+j+" : "
					   +testers[j][k].xStats.mean
					   +" diff : "
					   +testers[j][k].differencesStats
					     .mean);
			}
		      }
		    }
		  } else {
		    // significantly worse ?
		    if (testers[j][k].differencesSignificance != 0) {
		      if (testers[j][k].differencesSignificance > 0) {
			eliminated[j] = true;
			eliminatedCount++;
			if (m_debug) {
			  System.err.println("Eliminating (-worse) "
					   +j+" "+raceBitSets[j].toString()
					   +" vs "+k+" "
					   +raceBitSets[k].toString()
					   +" after "
					   +processedCount
					   +" evaluations"
					   +"\nerror "+j+" : "
					   +testers[j][k].xStats.mean
					   +" vs "+k+" : "
					   +testers[j][k].yStats.mean);
			}
			break;
		      } else {
			eliminated[k] = true;
			eliminatedCount++;
			if (m_debug) {
			  System.err.println("Eliminating (worse) "
					   +k+" "+raceBitSets[k].toString()
					   +" vs "+j+" "
					   +raceBitSets[j].toString()
					   +" after "
					   +processedCount
					   +" evaluations"
					   +"\nerror "+k+" : "
					   +testers[j][k].yStats.mean
					   +" vs "+j+" : "
					   +testers[j][k].xStats.mean);
			}
		      }
		    }
		  }
		}    
	      }
	    }
	  }
	}
	// if there is a base set from the previous race and it's the
	// only remaining subset then terminate the race.
	if (eliminatedCount == raceSets.length-1 && baseSetIncluded &&
	    !eliminated[0] && !m_rankingRequested) {
	  break race;
	}
      }
    }

    if (m_debug) {
      System.err.println("*****eliminated count: "+eliminatedCount);
    }
    double bestError = Double.MAX_VALUE;
    int bestIndex=0;
    // return the index of the winner
    for (int i=startPt;i<raceSets.length;i++) {
      if (!eliminated[i]) {
	individualStats[i].calculateDerived();
	if (m_debug) {
	  System.err.println("Remaining error: "+raceBitSets[i].toString()
			     +" "+individualStats[i].mean);
	}
	if (individualStats[i].mean < bestError) {
	  bestError = individualStats[i].mean;
	  bestIndex = i;
	}
      }
    }

    double [] retInfo = new double[2];
    retInfo[0] = bestIndex;
    retInfo[1] = bestError;
    
    if (m_debug) {
      System.err.print("Best set from race : ");
      
      for (int i=0;i<m_numAttribs;i++) {
	if (raceSets[bestIndex][i] == '1') {
	  System.err.print('1');
	} else {
	  System.err.print('0');
	}
      }
      System.err.println(" :"+bestError+" Processed : "+(processedCount)
			 +"\n"+individualStats[bestIndex].toString());
    }
    return retInfo;
  }

  public String toString() {
    StringBuffer text = new StringBuffer();
    
    text.append("\tRaceSearch.\n\tRace type : ");
    switch (m_raceType) {
    case FORWARD_RACE: 
      text.append("forward selection race\n\tBase set : no attributes");
      break;
    case BACKWARD_RACE:
      text.append("backward elimination race\n\tBase set : all attributes");
      break;
    case SCHEMATA_RACE:
      text.append("schemata race\n\tBase set : no attributes");
      break;
    case RANK_RACE:
      text.append("rank race\n\tBase set : no attributes\n\t");
      text.append("Attribute evaluator : "
		  + getAttributeEvaluator().getClass().getName() +" ");
      if (m_ASEval instanceof OptionHandler) {
	String[] evaluatorOptions = new String[0];
	evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
	for (int i=0;i<evaluatorOptions.length;i++) {
	  text.append(evaluatorOptions[i]+' ');
	}
      }
      text.append("\n");
      text.append("\tAttribute ranking : \n");
      int rlength = (int)(Math.log(m_Ranking.length) / Math.log(10) + 1);
      for (int i=0;i<m_Ranking.length;i++) {
	text.append("\t "+Utils.doubleToString((double)(m_Ranking[i]+1),
					       rlength,0)
		    +" "+m_Instances.attribute(m_Ranking[i]).name()+'\n');
      }
      break;
    }
    text.append("\n\tCross validation mode : ");
    if (m_xvalType == TEN_FOLD) {
      text.append("10 fold");
    } else {
      text.append("Leave-one-out");
    }

    text.append("\n\tMerit of best subset found : ");
    int fieldwidth = 3;
    double precision = (m_bestMerit - (int)m_bestMerit);
    if (Math.abs(m_bestMerit) > 0) {
      fieldwidth = (int)Math.abs((Math.log(Math.abs(m_bestMerit)) / 
				  Math.log(10)))+2;
    }
    if (Math.abs(precision) > 0) {
      precision = Math.abs((Math.log(Math.abs(precision)) / Math.log(10)))+3;
    } else {
      precision = 2;
    }

    text.append(Utils.doubleToString(Math.abs(m_bestMerit),
				     fieldwidth+(int)precision,
				     (int)precision)+"\n");
    return text.toString();
    
  }

  /**
   * Reset the search method.
   */
  protected void resetOptions () {
    m_sigLevel = 0.001;
    m_delta = 0.001;
    m_ASEval = new GainRatioAttributeEval();
    m_Ranking = null;
    m_raceType = FORWARD_RACE;
    m_debug = false;
    m_theEvaluator = null;
    m_bestMerit = -Double.MAX_VALUE;
    m_numFolds = 10;
  }
}