www.pudn.com > weka.rar > Apriori.java, change:2001-03-14,size:31265b


/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Apriori.java
 *    Copyright (C) 1999 Eibe Frank,Mark Hall
 *
 */

package weka.associations;

import java.io.*;
import java.util.*;
import weka.core.*;
import weka.filters.Filter;
import weka.filters.AttributeFilter;

/**
 * Class implementing an Apriori-type algorithm. Iteratively reduces the minimum
 * support until it finds the required number of rules with the given minimum 
 * confidence. <p>
 *
 * Reference: R. Agrawal, R. Srikant (1994). <i>Fast algorithms for
 * mining association rules in large databases </i>. Proc
 * International Conference on Very Large Databases,
 * pp. 478-499. Santiage, Chile: Morgan Kaufmann, Los Altos, CA. <p>
 *
 * Valid options are:<p>
 *   
 * -N required number of rules <br>
 * The required number of rules (default: 10). <p>
 *
 * -T type of metric by which to sort rules <br>
 * 0 = confidence | 1 = lift | 2 = leverage | 3 = Conviction. <p>
 *
 * -C minimum confidence of a rule <br>
 * The minimum confidence of a rule (default: 0.9). <p>
 *
 * -D delta for minimum support <br>
 * The delta by which the minimum support is decreased in
 * each iteration (default: 0.05). <p>
 *
 * -U upper bound for minimum support <br>
 * The upper bound for minimum support. Don't explicitly look for 
 * rules with more than this level of support. <p>
 *
 * -M lower bound for minimum support <br>
 * The lower bound for the minimum support (default = 0.1). <p>
 *
 * -S significance level <br>
 * If used, rules are tested for significance at
 * the given level. Slower (default = no significance testing). <p>
 *
 * -R <br>
 * If set then columns that contain all missing values are removed from
 * the data.
 *
 * -I <br>
 * If set the itemsets found are also output (default = no). <p>
 *
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision: 1.11 $ */
public class Apriori extends Associator implements OptionHandler {
  
  /** The minimum support. */
  protected double m_minSupport;

  /** The upper bound on the support */
  protected double m_upperBoundMinSupport;

  /** The lower bound for the minimum support. */
  protected double m_lowerBoundMinSupport;

  /** Metric types. */
  protected static final int CONFIDENCE = 0;
  protected static final int LIFT = 1;
  protected static final int LEVERAGE = 2;
  protected static final int CONVICTION = 3;
  public static final Tag [] TAGS_SELECTION = {
    new Tag(CONFIDENCE, "Confidence"),
    new Tag(LIFT, "Lift"),
    new Tag(LEVERAGE, "Leverage"),
    new Tag(CONVICTION, "Conviction")
      };

  /** The selected metric type. */
  protected int m_metricType = CONFIDENCE;

  /** The minimum metric score. */
  protected double m_minMetric;

  /** The maximum number of rules that are output. */
  protected int m_numRules;

  /** Delta by which m_minSupport is decreased in each iteration. */
  protected double m_delta;

  /** Significance level for optional significance test. */
  protected double m_significanceLevel;

  /** Number of cycles used before required number of rules was one. */
  protected int m_cycles;

  /** The set of all sets of itemsets L. */
  protected FastVector m_Ls;

  /** The same information stored in hash tables. */
  protected FastVector m_hashtables;

  /** The list of all generated rules. */
  protected FastVector[] m_allTheRules;

  /** The instances (transactions) to be used for generating 
      the association rules. */
  protected Instances m_instances;

  /** Output itemsets found? */
  protected boolean m_outputItemSets;

  protected boolean m_removeMissingCols;

  /** Report progress iteratively */
  protected boolean m_verbose;

  /**
   * Returns a string describing this associator
   * @return a description of the evaluator suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "Finds association rules.";
  }

  /**
   * Constructor that allows to sets default values for the 
   * minimum confidence and the maximum number of rules
   * the minimum confidence.
   */
  public Apriori() {

    resetOptions();
  }

  /**
   * Resets the options to the default values.
   */
  public void resetOptions() {
    
    m_removeMissingCols = false;
    m_verbose = false;
    m_delta = 0.05;
    m_minMetric = 0.90;
    m_numRules = 10;
    m_lowerBoundMinSupport = 0.1;
    m_upperBoundMinSupport = 1.0;
    m_significanceLevel = -1;
    m_outputItemSets = false;
  }

  /**
   * Removes columns that are all missing from the data
   * @param instances the instances
   * @return a new set of instances with all missing columns removed
   */
  private Instances removeMissingColumns(Instances instances) 
    throws Exception {
    int numInstances = instances.numInstances();
    StringBuffer deleteString = new StringBuffer();
    int removeCount = 0;
    boolean first = true;
    int maxCount = 0;
    
    for (int i=0;i<instances.numAttributes();i++) {
      AttributeStats as = instances.attributeStats(i);
      if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
	// see if we can decrease this by looking for the most frequent value
	int [] counts = as.nominalCounts;
	if (counts[Utils.maxIndex(counts)] > maxCount) {
	  maxCount = counts[Utils.maxIndex(counts)];
	}
      }
      if (as.missingCount == numInstances) {
	if (first) {
	  deleteString.append((i+1));
	  first = false;
	} else {
	  deleteString.append(","+(i+1));
	}
	removeCount++;
      }
    }
    if (m_verbose) {
      System.err.println("Removed : "+removeCount+" columns with all missing "
			 +"values.");
    }
    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
      m_upperBoundMinSupport = (double)maxCount / (double)numInstances;
      if (m_verbose) {
	System.err.println("Setting upper bound min support to : "
			   +m_upperBoundMinSupport);
      }
    }

    if (deleteString.toString().length() > 0) {
      AttributeFilter af = new AttributeFilter();
      af.setAttributeIndices(deleteString.toString());
      af.setInvertSelection(false);
      af.setInputFormat(instances);
      Instances newInst = Filter.useFilter(instances, af);

      return newInst;
    }
    return instances;
  }

  /**
   * Method that generates all large itemsets with a minimum support, and from
   * these all association rules with a minimum confidence.
   *
   * @param instances the instances to be used for generating the associations
   * @exception Exception if rules can't be built successfully
   */
  public void buildAssociations(Instances instances) throws Exception {

    double[] confidences, supports;
    int[] indices;
    FastVector[] sortedRuleSet;
    int necSupport=0;

    if (instances.checkForStringAttributes()) {
      throw new Exception("Can't handle string attributes!");
    }

    if (m_removeMissingCols) {
      instances = removeMissingColumns(instances);
    }

    // Decrease minimum support until desired number of rules found.
    m_cycles = 0;
    m_minSupport = m_upperBoundMinSupport - m_delta;
    m_minSupport = (m_minSupport < m_lowerBoundMinSupport) 
      ? m_lowerBoundMinSupport 
      : m_minSupport;
    do {

      // Reserve space for variables
      m_Ls = new FastVector();
      m_hashtables = new FastVector();
      m_allTheRules = new FastVector[6];
      m_allTheRules[0] = new FastVector();
      m_allTheRules[1] = new FastVector();
      m_allTheRules[2] = new FastVector();
      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
	m_allTheRules[3] = new FastVector();
	m_allTheRules[4] = new FastVector();
	m_allTheRules[5] = new FastVector();
      }
      sortedRuleSet = new FastVector[6];
      sortedRuleSet[0] = new FastVector();
      sortedRuleSet[1] = new FastVector();
      sortedRuleSet[2] = new FastVector();
      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
	sortedRuleSet[3] = new FastVector();
	sortedRuleSet[4] = new FastVector();
	sortedRuleSet[5] = new FastVector();
      }

      // Find large itemsets and rules
      findLargeItemSets(instances);
      if (m_significanceLevel != -1 || m_metricType != CONFIDENCE) 
	findRulesBruteForce();
      else
	findRulesQuickly();
      
      // Sort rules according to their support
      supports = new double[m_allTheRules[2].size()];
      for (int i = 0; i < m_allTheRules[2].size(); i++) 
	supports[i] = (double)((ItemSet)m_allTheRules[1].elementAt(i)).support();
      indices = Utils.stableSort(supports);
      for (int i = 0; i < m_allTheRules[2].size(); i++) {
	sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));
	sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));
	sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));
	if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
	  sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i]));
	  sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i]));
	  sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i]));
	}
      }

      // Sort rules according to their confidence
      m_allTheRules[0].removeAllElements();
      m_allTheRules[1].removeAllElements();
      m_allTheRules[2].removeAllElements();
      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
	m_allTheRules[3].removeAllElements();
	m_allTheRules[4].removeAllElements();
	m_allTheRules[5].removeAllElements();
      }
      confidences = new double[sortedRuleSet[2].size()];
      int sortType = 2 + m_metricType;

      for (int i = 0; i < sortedRuleSet[2].size(); i++) 
	confidences[i] = 
	  ((Double)sortedRuleSet[sortType].elementAt(i)).doubleValue();
      indices = Utils.stableSort(confidences);
      for (int i = sortedRuleSet[0].size() - 1; 
	   (i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) {
	m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));
	m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));
	m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));
	if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
	  m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i]));
	  m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i]));
	  m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i]));
	}
      }
      m_minSupport -= m_delta;
      m_minSupport = (m_minSupport < m_lowerBoundMinSupport) 
	? 0 
	: m_minSupport;

      necSupport = (int)(m_minSupport * 
			 (double)instances.numInstances()+0.5);

      m_cycles++;
      if (m_verbose) {
	if (m_Ls.size() > 1) {
	  System.out.println(toString());
	}
      }
    } while ((m_allTheRules[0].size() < m_numRules) && 
	     (m_minSupport >= m_lowerBoundMinSupport)
	     /*	     (necSupport >= lowerBoundNumInstancesSupport)*/
	     /*	     (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ &&     
	     (necSupport >= 1));
    m_minSupport += m_delta;
  }

  /**
   * Returns an enumeration describing the available options
   *
   * @return an enumeration of all the available options
   */
  public Enumeration listOptions() {

    String string1 = "\tThe required number of rules. (default = " + m_numRules + ")",
      string2 = 
      "\tThe minimum confidence of a rule. (default = " + m_minMetric + ")",
      string3 = "\tThe delta by which the minimum support is decreased in\n",
      string4 = "\teach iteration. (default = " + m_delta + ")",
      string5 = 
      "\tThe lower bound for the minimum support. (default = " + 
      m_lowerBoundMinSupport + ")",
      string6 = "\tIf used, rules are tested for significance at\n",
      string7 = "\tthe given level. Slower. (default = no significance testing)",
      string8 = "\tIf set the itemsets found are also output. (default = no)",
      stringType = "\tThe metric type by which to rank rules. (default = "
      +"confidence)";
    

    FastVector newVector = new FastVector(9);

    newVector.addElement(new Option(string1, "N", 1, 
				    "-N <required number of rules output>"));
    newVector.addElement(new Option(stringType, "T", 1,
				    "-T <0=confidence | 1=lift | "
				    +"2=leverage | 3=Conviction>"));
    newVector.addElement(new Option(string2, "C", 1, 
				    "-C <minimum metric score of a rule>"));
    newVector.addElement(new Option(string3 + string4, "D", 1,
				    "-D <delta for minimum support>"));
    newVector.addElement(new Option("\tUpper bound for minimum support. "
				    +"(default = 1.0)", "U", 1,
				     "-U <upper bound for minimum support>"));
    newVector.addElement(new Option(string5, "M", 1,
				    "-M <lower bound for minimum support>"));
    newVector.addElement(new Option(string6 + string7, "S", 1,
				    "-S <significance level>"));
    newVector.addElement(new Option(string8, "S", 0,
				    "-I"));
    newVector.addElement(new Option("\tRemove columns that contain "
				    +"all missing values (default = no)"
				    , "R", 0,
				    "-R"));
    newVector.addElement(new Option("\tReport progress iteratively. (default "
				    +"= no)", "V", 0,
				    "-V"));
    
    return newVector.elements();
  }

  /**
   * Parses a given list of options. Valid options are:<p>
   *   
   * -N required number of rules <br>
   * The required number of rules (default: 10). <p>
   *
   * -T type of metric by which to sort rules <br>
   * 0 = confidence | 1 = lift | 2 = leverage | 3 = Conviction. <p>
   *
   * -C minimum metric score of a rule <br>
   * The minimum confidence of a rule (default: 0.9). <p>
   *
   * -D delta for minimum support <br>
   * The delta by which the minimum support is decreased in
   * each iteration (default: 0.05).
   *
   * -U upper bound for minimum support <br>
   * The upper bound for minimum support. Don't explicitly look for 
   * rules with more than this level of support. <p>
   *
   * -M lower bound for minimum support <br>
   * The lower bound for the minimum support (default = 0.1). <p>
   *
   * -S significance level <br>
   * If used, rules are tested for significance at
   * the given level. Slower (default = no significance testing). <p>
   *
   * -I <br>
   * If set the itemsets found are also output (default = no). <p>
   *
   * -V <br>
   * If set then progress is reported iteratively during execution. <p>
   *
   * -R <br>
   * If set then columns that contain all missing values are removed from
   * the data. <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported 
   */
  public void setOptions(String[] options) throws Exception{
    
    resetOptions();
    String numRulesString = Utils.getOption('N', options),
      minConfidenceString = Utils.getOption('C', options),
      deltaString = Utils.getOption('D', options),
      maxSupportString = Utils.getOption('U', options),
      minSupportString = Utils.getOption('M', options),
      significanceLevelString = Utils.getOption('S', options);
    String metricTypeString = Utils.getOption('T', options);
    if (metricTypeString.length() != 0) {
      setMetricType(new SelectedTag(Integer.parseInt(metricTypeString),
				    TAGS_SELECTION));
    }
    
    if (numRulesString.length() != 0) {
      m_numRules = Integer.parseInt(numRulesString);
    }
    if (minConfidenceString.length() != 0) {
      m_minMetric = (new Double(minConfidenceString)).doubleValue();
    }
    if (deltaString.length() != 0) {
      m_delta = (new Double(deltaString)).doubleValue();
    }
    if (maxSupportString.length() != 0) {
      setUpperBoundMinSupport((new Double(maxSupportString)).doubleValue());
    }
    if (minSupportString.length() != 0) {
      m_lowerBoundMinSupport = (new Double(minSupportString)).doubleValue();
    }
    if (significanceLevelString.length() != 0) {
      m_significanceLevel = (new Double(significanceLevelString)).doubleValue();
    }
    m_outputItemSets = Utils.getFlag('I', options);
    m_verbose = Utils.getFlag('V', options);
    setRemoveAllMissingCols(Utils.getFlag('R', options));
  }

  /**
   * Gets the current settings of the Apriori object.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {

    String [] options = new String [16];
    int current = 0;

    if (m_outputItemSets) {
      options[current++] = "-I";
    }

    if (getRemoveAllMissingCols()) {
      options[current++] = "-R";
    }

    options[current++] = "-N"; options[current++] = "" + m_numRules;
    options[current++] = "-T"; options[current++] = "" + m_metricType;
    options[current++] = "-C"; options[current++] = "" + m_minMetric;
    options[current++] = "-D"; options[current++] = "" + m_delta;
    options[current++] = "-U"; options[current++] = ""+m_upperBoundMinSupport;
    options[current++] = "-M"; options[current++] = ""+m_lowerBoundMinSupport;
    options[current++] = "-S"; options[current++] = "" + m_significanceLevel;

    while (current < options.length) {
      options[current++] = "";
    }
    return options;
  }

  /**
   * Outputs the size of all the generated sets of itemsets and the rules.
   */
  public String toString() {

    StringBuffer text = new StringBuffer();

    if (m_Ls.size() <= 1)
      return "\nNo large itemsets and rules found!\n";
    text.append("\nApriori\n=======\n\n");
    text.append("Minimum support: " 
		+ Utils.doubleToString(m_minSupport,2) + '\n');
    text.append("Minimum metric <");
    switch(m_metricType) {
    case CONFIDENCE:
      text.append("confidence>: ");
      break;
    case LIFT:
      text.append("lift>: ");
      break;
    case LEVERAGE:
      text.append("leverage>: ");
      break;
    case CONVICTION:
      text.append("conviction>: ");
      break;
    }
    text.append(Utils.doubleToString(m_minMetric,2)+'\n');
   
    if (m_significanceLevel != -1)
      text.append("Significance level: "+
		  Utils.doubleToString(m_significanceLevel,2)+'\n');
    text.append("Number of cycles performed: " + m_cycles+'\n');
    text.append("\nGenerated sets of large itemsets:\n");
    for (int i = 0; i < m_Ls.size(); i++) {
      text.append("\nSize of set of large itemsets L("+(i+1)+"): "+
		  ((FastVector)m_Ls.elementAt(i)).size()+'\n');
      if (m_outputItemSets) {
	text.append("\nLarge Itemsets L("+(i+1)+"):\n");
	for (int j = 0; j < ((FastVector)m_Ls.elementAt(i)).size(); j++)
	  text.append(((ItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).
		      toString(m_instances)+"\n");
      }
    }
    text.append("\nBest rules found:\n\n");
    for (int i = 0; i < m_allTheRules[0].size(); i++) {
      text.append(Utils.doubleToString((double)i+1, 
		  (int)(Math.log(m_numRules)/Math.log(10)+1),0)+
		  ". " + ((ItemSet)m_allTheRules[0].elementAt(i)).
		  toString(m_instances) 
		  + " ==> " + ((ItemSet)m_allTheRules[1].elementAt(i)).
		  toString(m_instances) +"    conf:("+  
		  Utils.doubleToString(((Double)m_allTheRules[2].
					elementAt(i)).doubleValue(),2)+")");
      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
	text.append((m_metricType == LIFT ? " <" : "")+" lift:("+  
		    Utils.doubleToString(((Double)m_allTheRules[3].
					  elementAt(i)).doubleValue(),2)
		    +")"+(m_metricType == LIFT ? ">" : ""));
	text.append((m_metricType == LEVERAGE ? " <" : "")+" lev:("+  
		    Utils.doubleToString(((Double)m_allTheRules[4].
					  elementAt(i)).doubleValue(),2)
		    +")");
	text.append(" ["+
		    (int)(((Double)m_allTheRules[4].elementAt(i))
			  .doubleValue() * (double)m_instances.numInstances())
		    +"]"+(m_metricType == LEVERAGE ? ">" : ""));
	text.append((m_metricType == CONVICTION ? " <" : "")+" conv:("+  
		    Utils.doubleToString(((Double)m_allTheRules[5].
					  elementAt(i)).doubleValue(),2)
		    +")"+(m_metricType == CONVICTION ? ">" : ""));
      }
      text.append('\n');
    }
    return text.toString();
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String removeAllMissingColsTipText() {
    return "Remove columns with all missing values.";
  }

  /**
   * Remove columns containing all missing values.
   * @param r true if cols are to be removed.
   */
  public void setRemoveAllMissingCols(boolean r) {
    m_removeMissingCols = r;
  }

  /**
   * Returns whether columns containing all missing values are to be removed
   * @return true if columns are to be removed.
   */
  public boolean getRemoveAllMissingCols() {
    return m_removeMissingCols;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String upperBoundMinSupportTipText() {
    return "Upper bound for minimum support. Start iteratively decreasing "
      +"minimum support from this value.";
  }

  /**
   * Get the value of upperBoundMinSupport.
   *
   * @return Value of upperBoundMinSupport.
   */
  public double getUpperBoundMinSupport() {
    
    return m_upperBoundMinSupport;
  }
  
  /**
   * Set the value of upperBoundMinSupport.
   *
   * @param v  Value to assign to upperBoundMinSupport.
   */
  public void setUpperBoundMinSupport(double v) {
    
    m_upperBoundMinSupport = v;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String lowerBoundMinSupportTipText() {
    return "Lower bound for minimum support.";
  }

  /**
   * Get the value of lowerBoundMinSupport.
   *
   * @return Value of lowerBoundMinSupport.
   */
  public double getLowerBoundMinSupport() {
    
    return m_lowerBoundMinSupport;
  }
  
  /**
   * Set the value of lowerBoundMinSupport.
   *
   * @param v  Value to assign to lowerBoundMinSupport.
   */
  public void setLowerBoundMinSupport(double v) {
    
    m_lowerBoundMinSupport = v;
  }
  
  /**
   * Get the metric type
   *
   * @return the type of metric to use for ranking rules
   */
  public SelectedTag getMetricType() {
    return new SelectedTag(m_metricType, TAGS_SELECTION);
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String metricTypeTipText() {
    return "Set the type of metric by which to rank rules. Confidence is "
      +"the proportion of the examples covered by the premise that are also "
      +"covered by the consequence. Lift is confidence divided by the "
      +"proportion of all examples that are covered by the consequence. This "
      +"is a measure of the importance of the association that is independent "
      +"of support. Leverage is the proportion of additional examples covered "
      +"by both the premise and consequence above those expected if the "
      +"premise and consequence were independent of each other. The total "
      +"number of examples that this represents is presented in brackets "
      +"following the leverage. Conviction is "
      +"another measure of departure from independence. Conviction is given "
      +"by ";
  }

  /**
   * Set the metric type for ranking rules
   *
   * @param d the type of metric
   */
  public void setMetricType (SelectedTag d) {
    
    if (d.getTags() == TAGS_SELECTION) {
      m_metricType = d.getSelectedTag().getID();
    }

    if (m_significanceLevel != -1 && m_metricType != CONFIDENCE) {
      m_metricType = CONFIDENCE;
    }

    if (m_metricType == CONFIDENCE) {
      setMinMetric(0.9);
    }

    if (m_metricType == LIFT || m_metricType == CONVICTION) {
      setMinMetric(1.1);
    }
  
    if (m_metricType == LEVERAGE) {
      setMinMetric(0.1);
    }
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String minMetricTipText() {
    return "Minimum metric score. Consider only rules with scores higher than "
      +"this value.";
  }

  /**
   * Get the value of minConfidence.
   *
   * @return Value of minConfidence.
   */
  public double getMinMetric() {
    
    return m_minMetric;
  }
  
  /**
   * Set the value of minConfidence.
   *
   * @param v  Value to assign to minConfidence.
   */
  public void setMinMetric(double v) {
    
    m_minMetric = v;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numRulesTipText() {
    return "Number of rules to find.";
  }

  /**
   * Get the value of numRules.
   *
   * @return Value of numRules.
   */
  public int getNumRules() {
    
    return m_numRules;
  }
  
  /**
   * Set the value of numRules.
   *
   * @param v  Value to assign to numRules.
   */
  public void setNumRules(int v) {
    
    m_numRules = v;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String deltaTipText() {
    return "Iteratively decrease support by this factor. Reduces support "
      +"until min support is reached or required number of rules has been "
      +"generated.";
  }
    
  /**
   * Get the value of delta.
   *
   * @return Value of delta.
   */
  public double getDelta() {
    
    return m_delta;
  }
  
  /**
   * Set the value of delta.
   *
   * @param v  Value to assign to delta.
   */
  public void setDelta(double v) {
    
    m_delta = v;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String significanceLevelTipText() {
    return "Significance level. Significance test (confidence metric only).";
  }

  /**
   * Get the value of significanceLevel.
   *
   * @return Value of significanceLevel.
   */
  public double getSignificanceLevel() {
    
    return m_significanceLevel;
  }
  
  /**
   * Set the value of significanceLevel.
   *
   * @param v  Value to assign to significanceLevel.
   */
  public void setSignificanceLevel(double v) {
    
    m_significanceLevel = v;
  }

  /** 
   * Method that finds all large itemsets for the given set of instances.
   *
   * @param the instances to be used
   * @exception Exception if an attribute is numeric
   */
  private void findLargeItemSets(Instances instances) throws Exception {
    
    FastVector kMinusOneSets, kSets;
    Hashtable hashtable;
    int necSupport, necMaxSupport,i = 0;
    
    m_instances = instances;
    
    // Find large itemsets

    // minimum support
    necSupport = (int)(m_minSupport * (double)instances.numInstances()+0.5);
    necMaxSupport = (int)(m_upperBoundMinSupport * (double)instances.numInstances()+0.5);
   
    kSets = ItemSet.singletons(instances);
    ItemSet.upDateCounters(kSets, instances);
    kSets = ItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
    if (kSets.size() == 0)
      return;
    do {
      m_Ls.addElement(kSets);
      kMinusOneSets = kSets;
      kSets = ItemSet.mergeAllItemSets(kMinusOneSets, i, instances.numInstances());
      hashtable = ItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
      m_hashtables.addElement(hashtable);
      kSets = ItemSet.pruneItemSets(kSets, hashtable);
      ItemSet.upDateCounters(kSets, instances);
      kSets = ItemSet.deleteItemSets(kSets, necSupport, necMaxSupport);
      i++;
    } while (kSets.size() > 0);
  }  

  /** 
   * Method that finds all association rules and performs significance test.
   *
   * @exception Exception if an attribute is numeric
   */
  private void findRulesBruteForce() throws Exception {

    FastVector[] rules;

    // Build rules
    for (int j = 1; j < m_Ls.size(); j++) {
      FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
      Enumeration enumItemSets = currentItemSets.elements();
      while (enumItemSets.hasMoreElements()) {
	ItemSet currentItemSet = (ItemSet)enumItemSets.nextElement();
	rules=currentItemSet.
	  generateRulesBruteForce(m_minMetric,m_metricType,
				  m_hashtables,j+1,
				  m_instances.numInstances(),
				  m_significanceLevel);
	for (int k = 0; k < rules[0].size(); k++) {
	  m_allTheRules[0].addElement(rules[0].elementAt(k));
	  m_allTheRules[1].addElement(rules[1].elementAt(k));
	  m_allTheRules[2].addElement(rules[2].elementAt(k));

	  m_allTheRules[3].addElement(rules[3].elementAt(k));
	  m_allTheRules[4].addElement(rules[4].elementAt(k));
	  m_allTheRules[5].addElement(rules[5].elementAt(k));
	}
      }
    }
  }

  /** 
   * Method that finds all association rules.
   *
   * @exception Exception if an attribute is numeric
   */
  private void findRulesQuickly() throws Exception {

    FastVector[] rules;

    // Build rules
    for (int j = 1; j < m_Ls.size(); j++) {
      FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
      Enumeration enumItemSets = currentItemSets.elements();
      while (enumItemSets.hasMoreElements()) {
	ItemSet currentItemSet = (ItemSet)enumItemSets.nextElement();
	rules = currentItemSet.generateRules(m_minMetric, m_hashtables, j + 1);
	for (int k = 0; k < rules[0].size(); k++) {
	  m_allTheRules[0].addElement(rules[0].elementAt(k));
	  m_allTheRules[1].addElement(rules[1].elementAt(k));
	  m_allTheRules[2].addElement(rules[2].elementAt(k));
	}
      }
    }
  }

  /**
   * Main method for testing this class.
   */
  public static void main(String[] options) {

    String trainFileString;
    StringBuffer text = new StringBuffer();
    Apriori apriori = new Apriori();
    Reader reader;

    try {
      text.append("\n\nApriori options:\n\n");
      text.append("-t <training file>\n");
      text.append("\tThe name of the training file.\n");
      Enumeration enum = apriori.listOptions();
      while (enum.hasMoreElements()) {
	Option option = (Option)enum.nextElement();
	text.append(option.synopsis()+'\n');
	text.append(option.description()+'\n');
      }
      trainFileString = Utils.getOption('t', options);
      if (trainFileString.length() == 0) 
	throw new Exception("No training file given!");
      apriori.setOptions(options);
      reader = new BufferedReader(new FileReader(trainFileString));
      apriori.buildAssociations(new Instances(reader));
      System.out.println(apriori);
    } catch(Exception e) {
      e.printStackTrace();
      System.out.println("\n"+e.getMessage()+text);
    }
  }
}